#include <util/digest/fnv.h>
#include <util/generic/hash_set.h>
#include <util/generic/size_literals.h>

#include <wmconsole/version3/library/jupiter/jupiter.h>
#include <wmconsole/version3/wmcutil/hostid.h>
#include <wmconsole/version3/wmcutil/url.h>
#include <wmconsole/version3/wmcutil/yt/triggers.h>
#include <robot/library/yt/static/command.h>
#include <wmconsole/version3/wmcutil/yt/yt_runner.h>
#include <wmconsole/version3/processors/tools/IKS/utils/canonizer.h>
#include <wmconsole/version3/processors/tools/host2vec/utils/utils.h>
#include <wmconsole/version3/processors/user_sessions/library/regions_limiter.h>
#include <library/cpp/compute_graph/compute_graph.h>
#include <wmconsole/version3/processors/user_sessions/niche2/conf/config.h>
#include <wmconsole/version3/processors/user_sessions/niche2/preparation/tables.pb.h>
#include <robot/library/yt/static/tags.h>
#include <library/cpp/text_processing/tokenizer/tokenizer.h>
#include <wmconsole/version3/wmcutil/yt/misc.h>
#include <util/string/vector.h>
#include <wmconsole/version3/library/dssm/dssm_utils.h>
#include <wmconsole/version3/processors/user_sessions/library/utils.h>
#include <wmconsole/version3/processors/user_sessions/protos/user_sessions.pb.h>

#include <wmconsole/version3/processors/user_sessions/niche/conf/config.h>

#include "task_prepare_queries.h"

namespace NWebmaster {
namespace NNiche2 {
using namespace NJupiter;
using namespace NProto;

static const int FIRST_SEGMENT_POSITION_UPPER_BOUND = 10;
static const int QUERY_TO_UIDS_LOWER_BOUND = 10;
static const int DOMAIN_TO_QUERY_LOWER_BOUND = 10;
// считаем, что больше часа -- сигнал
static const int INFINITE_DWELL_TIME_ON_SERVICE = 60 * 60;

static const uint64_t THIRD_SEGMENT_QUIERIES_AFTER_SAMPLING_MAX_COUNT = 7000;

static const THashSet<TString> PREPOSITIONS{"c","а","алло","без","белый","близко","более","больше","большой","будем","будет","будете","будешь","будто","буду","будут","будь","бы","бывает","бывь","был","была","были","было","быть","в","важная","важное","важные","важный","вам","вами","вас","ваш","ваша","ваше","ваши","вверх","вдали","вдруг","ведь","везде","вернуться","весь","вечер","взгляд","взять","вид","видел","видеть","вместе","вне","вниз","внизу","во","вода","война","вокруг","вон","вообще","вопрос","восемнадцатый","восемнадцать","восемь","восьмой","вот","впрочем","времени","время","все","все еще","всегда","всего","всем","всеми","всему","всех","всею","всю","всюду","вся","всё","второй","вы","выйти","г","где","главный","глаз","говорил","говорит","говорить","год","года","году","голова","голос","город","да","давать","давно","даже","далекий","далеко","дальше","даром","дать","два","двадцатый","двадцать","две","двенадцатый","двенадцать","дверь","двух","девятнадцатый","девятнадцать","девятый","девять","действительно","дел","делал","делать","делаю","дело","день","деньги","десятый","десять","для","до","довольно","долго","должен","должно","должный","дом","дорога","друг","другая","другие","других","друго","другое","другой","думать","душа","е","его","ее","ей","ему","если","есть","еще","ещё","ею","её","ж","ждать","же","жена","женщина","жизнь","жить","за","занят","занята","занято","заняты","затем","зато","зачем","здесь","земля","знать","значит","значить","и","иди","идти","из","или","им","имеет","имел","именно","иметь","ими","имя","иногда","их","к","каждая","каждое","каждые","каждый","кажется","казаться","как","какая","какой","кем","книга","когда","кого","ком","комната","кому","конец","конечно","которая","которого","которой","которые","который","которых","кроме","кругом","кто","куда","лежать","лет","ли","лицо","лишь","лучше","любить","люди","м","маленький","мало","мать","машина","между","меля","менее","меньше","меня","место","миллионов","мимо","минута","мир","мира","мне","много","многочисленная","многочисленное","многочисленные","многочисленный","мной","мною","мог","могу","могут","мож","может","может быть","можно","можхо","мои","мой","мор","москва","мочь","моя","моё","мы","на","наверху","над","надо","назад","наиболее","найти","наконец","нам","нами","народ","нас","начала","начать","наш","наша","наше","наши","не","него","недавно","недалеко","нее","ней","некоторый","нельзя","нем","немного","нему","непрерывно","нередко","несколько","нет","нею","неё","ни","нибудь","ниже","низко","никакой","никогда","никто","никуда","ним","ними","них","ничего","ничто","но","новый","нога","ночь","ну","нужно","нужный","нх","о","об","оба","обычно","один","одиннадцатый","одиннадцать","однажды","однако","одного","одной","оказаться","окно","около","он","она","они","оно","опять","особенно","остаться","от","ответить","отец","откуда","отовсюду","отсюда","очень","первый","перед","писать","плечо","по","под","подойди","подумать","пожалуйста","позже","пойти","пока","пол","получить","помнить","понимать","понять","пор","пора","после","последний","посмотреть","посреди","потом","потому","почему","почти","правда","прекрасно","при","про","просто","против","процентов","путь","пятнадцатый","пятнадцать","пятый","пять","работа","работать","раз","разве","рано","раньше","ребенок","решить","россия","рука","русский","ряд","рядом","с","с кем","сам","сама","сами","самим","самими","самих","само","самого","самой","самом","самому","саму","самый","свет","свое","своего","своей","свои","своих","свой","свою","сделать","сеаой","себе","себя","сегодня","седьмой","сейчас","семнадцатый","семнадцать","семь","сидеть","сила","сих","сказал","сказала","сказать","сколько","слишком","слово","случай","смотреть","сначала","снова","со","собой","собою","советский","совсем","спасибо","спросить","сразу","стал","старый","стать","стол","сторона","стоять","страна","суть","считать","т","та","так","такая","также","таки","такие","такое","такой","там","твои","твой","твоя","твоё","те","тебе","тебя","тем","теми","теперь","тех","то","тобой","тобою","товарищ","тогда","того","тоже","только","том","тому","тот","тою","третий","три","тринадцатый","тринадцать","ту","туда","тут","ты","тысяч","у","увидеть","уж","уже","улица","уметь","утро","хороший","хорошо","хотел бы","хотеть","хоть","хотя","хочешь","час","часто","часть","чаще","чего","человек","чем","чему","через","четвертый","четыре","четырнадцатый","четырнадцать","что","чтоб","чтобы","чуть","шестнадцатый","шестнадцать","шестой","шесть","эта","эти","этим","этими","этих","это","этого","этой","этом","этому","этот","эту","я","являюсь"};

static const TInputTag<NUserSessions::NProto::TQuery> UserSessionInputTag(1);
static const TInputTag<NUserSessions::NProto::TQuery> PrsInputTag(2);
static const TInputTag<TPreparedQueries> PreparedQueriesInputTag(3);
static const TInputTag<TQueryToCluster> SearchshareQueryToClusterInputTag(4);
static const TInputTag<TQueryToCluster> SpikesQueryToClusterInputTag(44);
static const TInputTag<TIntermediateQueries> IntermediateInputTag(5);
static const TInputTag<TQueryToUId> QueryToUIdInputTag(6);
static const TInputTag<TQueryToUIdCount> QueryToUIdCountInputTag(7);
static const TInputTag<TDomainToQuery> DomainToQueryInputTag(8);
static const TInputTag<TDomainToQueryCount> DomainToQueryCountInputTag(9);
static const TInputTag<TKeyHash> KeyHashInputTag(10);
static const TInputTag<TClusterInfo> ClusterInfoInputTag(11);
static const TInputTag<TRivalsWithMetric> RivalsWithMetricInputTag(12);
static const TInputTag<TPreparedQueriesWithClusterInfo> PreparedQueriesWithClusterInfoInputTag(13);
static const TInputTag<TNicheHostQuery> NicheHostQueryInputTag(14);
static const TInputTag<TNicheDomainClusterCount> NicheDomainClusterCountInputTag(15);
static const TInputTag<TNicheHostQuerySampled0> NicheHostQuerySampled0InputTag(16);
static const TInputTag<TNicheHostQuerySampled1> NicheHostQuerySampled1InputTag(17);
static const TInputTag<TNicheHostQuerySampled2> NicheHostQuerySampled2InputTag(18);
static const TInputTag<TNicheHostQuery> DomainsQueriesInputTag(19);

static const TOutputTag<TPreparedQueries> PreparedQueriesOutputTag(1);
static const TOutputTag<TIntermediateQueries> IntermediateOutputTag(2);
static const TOutputTag<TQueryToUId> QueryToUIdOutputTag(3);
static const TOutputTag<TQueryToUIdCount> QueryToUIdCountOutputTag(4);
static const TOutputTag<TDomainToQuery> DomainToQueryOutputTag(5);
static const TOutputTag<TDomainToQueryCount> DomainToQueryCountOutputTag(6);
static const TOutputTag<TKeyHash> QueryHashOutputTag(7);
static const TOutputTag<TKeyHash> HostHashOutputTag(8);
static const TOutputTag<TKeyHash> KeyHashOutputTag(9);
static const TOutputTag<TQueryToCluster> QueryToClusterOutputTag(12);
static const TOutputTag<TPreparedQueriesWithClusterInfo> PreparedQueriesWithClusterInfoOutputTag(13);
static const TOutputTag<TNicheHostQuery> NicheHostQueryOutputTag(14);
static const TOutputTag<TNicheHostQuerySampled0> NicheHostQuerySampled0OutputTag(15);
static const TOutputTag<TNicheHostQuerySampled1> NicheHostQuerySampled1OutputTag(16);
static const TOutputTag<TNicheHostQuerySampled2> NicheHostQuerySampled2OutputTag(17);
static const TOutputTag<TNicheHostQuerySampled> NicheHostQuerySampledOutputTag(18);
static const TOutputTag<TNicheHostQuery> DomainsQueriesOutputTag(19);

static TString CreateNicheTableNameByDate(const TString &nichePath, const TString &date) {
    return NYTUtils::JoinPath(nichePath, date);
}

double GetDistance(const TVector<float> &centroid, const TVector<float> &current) {
    double res = 0;
    auto sz = centroid.size();
    if (sz == 0 || current.size() != sz) {
        res = -1;
    } else {
        for (size_t i = 0; i < sz; ++i) {
            double diff = centroid[i] - current[i];
            res += diff * diff;
        }
    }
    return res;
}

static int64_t AddQueryAssociation(const TString& key, TTagedWriter &writer) {
    TKeyHash res;
    int64_t value = FnvHash<int64_t>(key);
    res.SetKey(key);
    res.SetHash(value);
    writer.AddRow(res, QueryHashOutputTag);
    return value;
}

static int64_t AddHostAssociation(const TString& key, TTagedWriter &writer) {
    TKeyHash res;
    int64_t value = FnvHash<int64_t>(key);
    res.SetKey(key);
    res.SetHash(value);
    writer.AddRow(res, HostHashOutputTag);
    return value;
}

NTextProcessing::NTokenizer::TTokenizerOptions CreateTokenizerOptions() {
    NTextProcessing::NTokenizer::TTokenizerOptions tokenizerOpts;
    tokenizerOpts.Lowercasing = true;
    tokenizerOpts.Lemmatizing = true;
    tokenizerOpts.SeparatorType = NTextProcessing::NTokenizer::ESeparatorType::BySense;
    tokenizerOpts.TokenTypes =
        {
            NTextProcessing::NTokenizer::ETokenType::Word,
            NTextProcessing::NTokenizer::ETokenType::Number
        };
    return tokenizerOpts;
}

static TString HostUrlToDomain(TString hostUrl) {
    return TString(CutWWWPrefix(CutMPrefix(CutSchemePrefix(hostUrl))));
}

struct TNormalizeOriginalQueriesMapper: public TTaggedMapper {
public:
    TNormalizeOriginalQueriesMapper():
        Tokenizer(CreateTokenizerOptions()) {
    }

    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        while (reader.IsValid()) {
            DoByTag(reader, writer, UserSessionInputTag);
            DoByTag(reader, writer, PrsInputTag);
        }
    }
private:
    void DoByTag(TTagedReader reader, TTagedWriter writer, TInputTag<NUserSessions::NProto::TQuery> inputTag) {
        for (auto &row: reader.GetRows(inputTag)) {
            TIntermediateQueries res;
            bool currentIsUserSession = reader.IsCurrentTable(UserSessionInputTag);
            // выкидываем не utf запросы [ошибки парсера]
            if (!IsUtf(row.GetCorrectedQuery()) || !IsUtf(row.GetQuery())) {
                continue;
            }
            // выкидываем мусор с мисскликами
            if (currentIsUserSession && !NUserSessions::IsVisibleQueryInWebmaster(row)) {
                continue;
            }
            if (!currentIsUserSession && !NUserSessions::IsVisiblePrsLogQuery(row)) {
                continue;
            }
            // выкидываем яндекс
            if (row.GetHost().Contains("yandex.ru")) {
                continue;
            }
            // выкидываем навигационные запросы
            if (row.GetUpperQueryNavPred() > 0.5) {
                continue;
            }

            int segment;
            bool isPornoQuery = row.GetUpperPornoUpperPl() == 100;
            TString domain = HostUrlToDomain(row.GetHost());
            auto normalizedQuery = NormalizeQuery(row.GetCorrectedQuery());

            if (currentIsUserSession) {
                if (row.GetPosition() < FIRST_SEGMENT_POSITION_UPPER_BOUND){
                    segment = E_USER_SESSION_GOOD;
                } else {
                    segment = E_USER_SESSION_BAD;
                }
            } else {
                segment = E_PRS_LOGS;
            }

            res.SetIsPornoQuery(isPornoQuery);
            res.SetCorrectedQuery(AddQueryAssociation(row.GetCorrectedQuery(), writer));
            res.SetNormalizedQuery(AddQueryAssociation(normalizedQuery, writer));
            res.SetDomain(AddHostAssociation(domain, writer));
            res.SetPosition(row.GetPosition());
            res.SetShows(row.GetShows());
            res.SetClicks(row.GetClicks());
            res.SetDwellTimeOnService(row.GetDwellTimeOnService());
            res.SetReqId(FnvHash<int64_t>(row.GetReqID()));
            res.SetUId(FnvHash<int64_t>(row.GetUID()));
            res.SetSegment(segment);
            res.SetCorrectedQueryText(row.GetCorrectedQuery());

            if (normalizedQuery != "" && row.GetCorrectedQuery() != "" && domain != "" && !isPornoQuery) {
                TQueryToUId q2uid;
                q2uid.SetNormalizedQuery(res.GetNormalizedQuery());
                q2uid.SetUId(res.GetUId());
                writer.AddRow(res, IntermediateOutputTag);
                writer.AddRow(q2uid, QueryToUIdOutputTag);
            }
        }
    }

    TString NormalizeQuery(TString query) {
        TVector<TString> tokens;
        for (TString x: Tokenizer.Tokenize(query)) {
            if (!PREPOSITIONS.contains(x)) {
                tokens.push_back(x);
            }
        }
        Sort(tokens);
        return JoinStrings(tokens.begin(), tokens.end(), " ");
    }

    NTextProcessing::NTokenizer::TTokenizer Tokenizer;
};
REGISTER_MAPPER(TNormalizeOriginalQueriesMapper)

// NormalizedQuery
struct TRemoveTailReducer: public TTaggedReducer {
public:
    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        THashSet<int64_t> uids;
        int64_t normalizedQuery;
        while (reader.IsValid()) {
            for (auto &row: reader.GetRows(QueryToUIdInputTag)) {
                normalizedQuery = row.GetNormalizedQuery();
                uids.insert(row.GetUId());
            }
        }
        if (uids.size() >= QUERY_TO_UIDS_LOWER_BOUND) {
            TQueryToUIdCount res;
            res.SetNormalizedQuery(normalizedQuery);
            res.SetCount(uids.size());
            writer.AddRow(res, QueryToUIdCountOutputTag);
        }
    }
};
REGISTER_REDUCER(TRemoveTailReducer)

struct TIntersectIntermediateTableMapper: public TTaggedMapper {
public:
    void Save(IOutputStream& stream) const override {
        ::Save(&stream, GoodQueries);
        TTaggedMapper::Save(stream);
    }

    void Load(IInputStream& stream) override {
        ::Load(&stream, GoodQueries);
        TTaggedMapper::Load(stream);
    }

    TIntersectIntermediateTableMapper(const THashSet<int64_t> &goodQueries): GoodQueries(goodQueries) {}

    TIntersectIntermediateTableMapper() = default;

    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        while (reader.IsValid()) {
            for (auto &row: reader.GetRows(IntermediateInputTag)) {
                if (GoodQueries.contains(row.GetNormalizedQuery())) {
                    TDomainToQuery domain2query;
                    writer.AddRow(row, IntermediateOutputTag);
                    domain2query.SetDomain(row.GetDomain());
                    domain2query.SetCorrectedQuery(row.GetCorrectedQuery());
                    writer.AddRow(domain2query, DomainToQueryOutputTag);
                }
            }
        }
    }
private:
    THashSet<int64_t> GoodQueries;
};
REGISTER_MAPPER(TIntersectIntermediateTableMapper)

// Domain
struct TRemoveTailDomainToQueryReducer: public TTaggedReducer {
public:
    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        THashSet<int64_t> queries;
        int64_t domain;
        while (reader.IsValid()) {
            for (auto &row: reader.GetRows(DomainToQueryInputTag)) {
                domain = row.GetDomain();
                queries.insert(row.GetCorrectedQuery());
            }
        }
        if (queries.size() >= DOMAIN_TO_QUERY_LOWER_BOUND) {
            TDomainToQueryCount res;
            res.SetDomain(domain);
            res.SetCount(queries.size());
            writer.AddRow(res, DomainToQueryCountOutputTag);
        }
    }
};
REGISTER_REDUCER(TRemoveTailDomainToQueryReducer)

struct TIntersectDomainsMapper: public TTaggedMapper {
public:
    void Save(IOutputStream& stream) const override {
        ::Save(&stream, GoodDomains);
        TTaggedMapper::Save(stream);
    }

    void Load(IInputStream& stream) override {
        ::Load(&stream, GoodDomains);
        TTaggedMapper::Load(stream);
    }

    TIntersectDomainsMapper() = default;

    TIntersectDomainsMapper(const THashSet<int64_t> &goodDomains): GoodDomains(goodDomains) { }

    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        while (reader.IsValid()) {
            for (auto &row: reader.GetRows(IntermediateInputTag)) {
                if (GoodDomains.contains(row.GetDomain())) {
                    writer.AddRow(row, IntermediateOutputTag);
                }
            }
        }
    }
private:
    THashSet<int64_t> GoodDomains;
};
REGISTER_MAPPER(TIntersectDomainsMapper)

// TopCorrectedQuery
struct TInnerJoinQueriesWithClusters: public TTaggedReducer {
public:
    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        int64_t searchshareClusterId = -1;
        // int64_t spikesClusterId = -1;
        while (reader.IsValid()) {
            auto mbQueryCluster = reader.GetRowMaybe(SearchshareQueryToClusterInputTag);
            if (mbQueryCluster.Defined()) {
                const auto &queryToCluster = mbQueryCluster.GetRef();
                searchshareClusterId = queryToCluster.GetClusterId();
            }
            reader.SkipRows(SearchshareQueryToClusterInputTag);

            // mbQueryCluster = reader.GetRowMaybe(SpikesQueryToClusterInputTag);
            // if (mbQueryCluster.Defined()) {
            //     const auto &queryToCluster = mbQueryCluster.GetRef();
            //     spikesClusterId = queryToCluster.GetClusterId();
            // }
            // reader.SkipRows(SpikesQueryToClusterInputTag);
            for (auto& row: reader.GetRows(PreparedQueriesInputTag)) {
                auto res = row;
                res.SetSearchshareCluster(searchshareClusterId);
                // res.SetSpikesCluster(spikesClusterId);
                writer.AddRow(res, PreparedQueriesOutputTag);
            }
        }

    }
};
REGISTER_REDUCER(TInnerJoinQueriesWithClusters)

// Domain, NormalizedQuery, Segment
struct TCalculateAllReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        TPreparedQueries res;

        res.SetHasInfDwellTime(false);
        int64_t total = 0;
        int64_t totalShows = 0;
        THashSet<int64_t> reqIds;
        int64_t totalPornQueries = 0;
        int64_t totalDwellTimeOnService = 0;
        int64_t totalPosition = 0;
        int64_t totalClicks = 0;

        int64_t bestCorrectedQuery;
        uint64_t bestPosition = LONG_LONG_MAX;
        uint64_t bestShows = 0;
        TString bestCorrectedQueryText;

        TIntermediateQueries lastRow;

        while (reader.IsValid()) {
            for (auto &row: reader.GetRows(IntermediateInputTag)) {
                lastRow = row;
                res.SetDomain(row.GetDomain());
                res.SetNormalizedQuery(row.GetNormalizedQuery());
                res.SetSegment(row.GetSegment());
                total += 1;
                totalShows += row.GetShows();
                totalClicks += row.GetClicks();
                reqIds.insert(row.GetReqId());
                totalPornQueries += row.GetIsPornoQuery();
                if (row.GetDwellTimeOnService() >= INFINITE_DWELL_TIME_ON_SERVICE) {
                    res.SetHasInfDwellTime(true);
                } else {
                    totalDwellTimeOnService += row.GetDwellTimeOnService();
                }
                totalPosition += row.GetShows() * row.GetPosition();
                if (row.GetPosition() == bestPosition && row.GetShows() > bestShows) {
                    bestShows = row.GetShows();
                    bestCorrectedQuery = row.GetCorrectedQuery();
                    bestCorrectedQueryText = row.GetCorrectedQueryText();
                }
                if (row.GetPosition() < bestPosition) {
                    bestPosition = row.GetPosition();
                    bestShows = row.GetShows();
                    bestCorrectedQuery = row.GetCorrectedQuery();
                    bestCorrectedQueryText = row.GetCorrectedQueryText();
                }
            }
        }
        res.SetTopCorrectedQueryText(bestCorrectedQueryText);
        res.SetTopCorrectedQuery(bestCorrectedQuery);
        res.SetTotal(total);
        res.SetReqIdCount(reqIds.size());
        res.SetPornQueriesPercent(totalPornQueries * 1.L / total);
        res.SetAverageDwellTimeOnService(totalDwellTimeOnService * 1.L / total);
        res.SetPosition(totalPosition * 1.L / totalShows);
        res.SetClicks(totalClicks);
        res.SetShows(totalShows);
        res.SetSearchshareCluster(-1);
        res.SetSpikesCluster(-1);
        writer.AddRow(res, PreparedQueriesOutputTag);
    }
};
REGISTER_REDUCER(TCalculateAllReducer)

// Key
struct TMakeKeyHashUniqueByKeyReducer : public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        while (reader.IsValid()) {
            for (auto &row: reader.GetRows(KeyHashInputTag)) {
                writer.AddRow(row, KeyHashOutputTag);
                return;
            }
        }
    }
};
REGISTER_REDUCER(TMakeKeyHashUniqueByKeyReducer)

struct TProcessQueriesMapper: public TTaggedMapper {
public:
    void Save(IOutputStream& stream) const override {
        ::Save(&stream, ClusterInfo);
        TTaggedMapper::Save(stream);
    }

    void Load(IInputStream& stream) override {
        ::Load(&stream, ClusterInfo);
        TTaggedMapper::Load(stream);
    }

    TProcessQueriesMapper() = default;

    TProcessQueriesMapper(const THashMap<int64_t, std::pair<TString, TVector<float>>> &clusterInfo):
        ClusterInfo(clusterInfo) { }

    void StartTagged(TTagedWriter) override final {
        TDssmL3Model::Ptr dssmL3Model(new TDssmL3Model);
        DssmCtr.Reset(new TDssmCtr(dssmL3Model));
    }

    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        TPreparedQueriesWithClusterInfo dstMsg;
        TVector<float> centroid;
        bool firstRead = true;
        while (reader.IsValid()) {
            for (auto &row: reader.GetRows(PreparedQueriesInputTag)) {
                TVector<float> dssm;
                DssmCtr->GetQueryEmbedding(row.GetTopCorrectedQueryText(), dssm);
                if (firstRead) {
                    auto currentClusterInfo = ClusterInfo[row.GetSearchshareCluster()];
                    dstMsg.SetClusterName(currentClusterInfo.first);
                    for (auto &x: currentClusterInfo.second) {
                        centroid.push_back(x);
                    }
                    firstRead = false;
                }
                dstMsg.SetSearchshareDist(GetDistance(centroid, dssm));
                dstMsg.SetDomain(row.GetDomain());
                dstMsg.SetNormalizedQuery(row.GetNormalizedQuery());
                dstMsg.SetTopCorrectedQuery(row.GetTopCorrectedQuery());
                dstMsg.SetPosition(row.GetPosition());
                dstMsg.SetTotal(row.GetTotal());
                dstMsg.SetSegment(row.GetSegment());
                dstMsg.SetSpikesCluster(row.GetSpikesCluster());
                dstMsg.SetPornQueriesPercent(row.GetPornQueriesPercent());
                dstMsg.SetReqIdCount(row.GetReqIdCount());
                dstMsg.SetAverageDwellTimeOnService(row.GetAverageDwellTimeOnService());
                dstMsg.SetShows(row.GetShows());
                dstMsg.SetClicks(row.GetClicks());
                dstMsg.SetHasInfDwellTime(row.GetHasInfDwellTime());
                dstMsg.SetTopCorrectedQueryText(row.GetTopCorrectedQueryText());
                writer.AddRow(dstMsg, PreparedQueriesWithClusterInfoOutputTag);
            }
        }
    }
private:
    THolder<TDssmCtr> DssmCtr;
    THashMap<int64_t, std::pair<TString, TVector<float>>> ClusterInfo;
};
REGISTER_MAPPER(TProcessQueriesMapper)

// _Domain
struct TBuildNicheReducer : public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        TDeque<TRivalsWithMetric> rivals;
        TNicheHostQuery res;
        int64_t initDomain = -228;
        for (auto &row: reader.GetRows(RivalsWithMetricInputTag)) {
            Cerr << initDomain << "\n";
            rivals.push_back(row);
            initDomain = row.GetRival();
        }
        Cerr << initDomain << " " << rivals.size() << "\n";
        // Y_ASSERT(initDomain != -228);
        for (auto &row: reader.GetRows(PreparedQueriesWithClusterInfoInputTag)) {
            for (auto& rival: rivals) {
                Cerr << "WTF?" << "\n";
                Cerr << rival.GetRival() << " " << rival.GetDomain() << "\n";
                res.SetDomain(rival.GetDomain());
                res.SetNormalizedQuery(row.GetNormalizedQuery());
                res.SetTopCorrectedQuery(row.GetTopCorrectedQuery());
                res.SetPosition(row.GetPosition());
                res.SetTotal(row.GetTotal());
                res.SetSegment(E_POTENCIAL);
                res.SetSpikesCluster(row.GetSpikesCluster());
                res.SetSearchshareCluster(row.GetSearchshareCluster());
                res.SetPornQueriesPercent(row.GetPornQueriesPercent());
                res.SetReqIdCount(row.GetReqIdCount());
                res.SetAverageDwellTimeOnService(row.GetAverageDwellTimeOnService());
                res.SetShows(row.GetShows());
                res.SetClicks(row.GetClicks());
                res.SetHasInfDwellTime(row.GetHasInfDwellTime());
                res.SetUserAddCount(rival.GetUserAddCount() > 0);
                res.SetSpylogCount(rival.GetSpylogCount() > 0);
                res.SetSimilarGroupCount(rival.GetSimilarGroupCount() > 0);
                res.SetClusterName(row.GetClusterName());
                res.SetSearchshareDist(row.GetSearchshareDist());
                res.SetTopCorrectedQueryText(row.GetTopCorrectedQueryText());
                res.SetInitDomain(initDomain);
                writer.AddRow(res, NicheHostQueryOutputTag);
            }
        }
    }
};
REGISTER_REDUCER(TBuildNicheReducer)

// Domain, NormalizedQuery
struct TLeaveOnlyThirdSegmentReducer : public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        while (reader.IsValid()) {
            bool isInSearch = false;
            for (auto &row: reader.GetRows(PreparedQueriesWithClusterInfoInputTag)) {
                Y_UNUSED(row);
                isInSearch = true;
            }
            for (auto &row: reader.GetRows(NicheHostQueryInputTag)) {
                if (!isInSearch) {
                    writer.AddRow(row, NicheHostQueryOutputTag);
                } else {
                    writer.AddRow(row, DomainsQueriesOutputTag);
                }
            }
        }
    }
};
REGISTER_REDUCER(TLeaveOnlyThirdSegmentReducer)

// Domain, NormalizedQuery
class TProcessThirdSegmentReducer: public NYT::IReducer<NYT::TTableReader<TNicheHostQuery>, NYT::TTableWriter<TNicheHostQuery>> {
public:
    void Do(TReader* reader, TWriter* writer) override {
        TNicheHostQuery dstMsg;
        int64_t reqIdCount{-1};
        uint64_t showsCount{0};

        double sumSearchshareDist{0};
        int64_t classifiedCnt{0};
        uint64_t sumShows{0};
        int64_t sumSimilarGroupCount{0};
        int64_t sumSpylogCount{0};
        int64_t sumUserAddCount{0};
        int64_t cnt{0};

        for (; reader->IsValid(); reader->Next(), cnt += 1) {
            auto &row = reader->GetRow();
            if (row.GetSearchshareDist() != -1) {
                sumSearchshareDist += row.GetSearchshareDist();
                classifiedCnt += 1;
            }
            sumShows += row.GetShows();
            sumSimilarGroupCount += row.GetSimilarGroupCount();
            sumSpylogCount += row.GetSpylogCount();
            sumUserAddCount += row.GetUserAddCount();

            if (reqIdCount < row.GetReqIdCount()) {
                reqIdCount = row.GetReqIdCount();
                showsCount = row.GetShows();
                SetUpDstMsg(dstMsg, row);
            } else if (reqIdCount == row.GetReqIdCount() && showsCount < row.GetShows()) {
                showsCount = row.GetShows();
                SetUpDstMsg(dstMsg, row);
            }
        }
        if (cnt == 0) {
            return;
        }
        if (classifiedCnt == 0) {
            classifiedCnt = 1;
            sumSearchshareDist = -1;
        }
        dstMsg.SetSearchshareDist(sumSearchshareDist / classifiedCnt);
        dstMsg.SetShows(sumShows / cnt);
        dstMsg.SetSimilarGroupCount(sumSimilarGroupCount);
        dstMsg.SetSpylogCount(sumSpylogCount);
        dstMsg.SetUserAddCount(sumUserAddCount);
        writer->AddRow(dstMsg);
    }
private:
    void SetUpDstMsg(TNicheHostQuery &dstMsg, const TNicheHostQuery &row) {
        dstMsg.SetTopCorrectedQuery(row.GetTopCorrectedQuery());
        dstMsg.SetDomain(row.GetDomain());
        dstMsg.SetNormalizedQuery(row.GetNormalizedQuery());
        dstMsg.SetClusterName(row.GetClusterName());
    }
};
REGISTER_REDUCER(TProcessThirdSegmentReducer);

// Domain
class TCountQueriesClusteredReducer: public NYT::IReducer<NYT::TTableReader<TNicheHostQuery>, NYT::TTableWriter<TNicheDomainClusterCount>> {
public:
    void Do(TReader* reader, TWriter* writer) override {
        TNicheDomainClusterCount dstMsg;
        int64_t count = 0;
        for (; reader->IsValid(); reader->Next(), ++count) {
            auto &row = reader->GetRow();
            if (count <= 2) {
                dstMsg.SetDomain(row.GetDomain());
                dstMsg.SetClusterName(row.GetClusterName());
            }
        }
        dstMsg.SetCount(count);
        if (count > 0) {
            writer->AddRow(dstMsg);
        }
    }
};
REGISTER_REDUCER(TCountQueriesClusteredReducer);

// Domain
class TCountQueriesReducer: public NYT::IReducer<NYT::TTableReader<TNicheHostQuery>, NYT::TTableWriter<TNicheDomainClusterCount>> {
public:
    void Do(TReader* reader, TWriter* writer) override {
        TNicheDomainClusterCount dstMsg;
        int64_t count = 0;
        for (; reader->IsValid(); reader->Next(), ++count) {
            auto &row = reader->GetRow();
            if (count <= 2) {
                dstMsg.SetDomain(row.GetDomain());
            }
        }
        dstMsg.SetCount(count);
        if (count > 0) {
            writer->AddRow(dstMsg);
        }
    }
};
REGISTER_REDUCER(TCountQueriesReducer);

struct TWeightDebugInfo {
    float Weight;
    double SearchshareDist;
    int64_t SpylogCount;
    int64_t SimilarGroupCount;
    int64_t UserAddCount;
    uint64_t Shows;
    double TopCorrectedQueryTextDist;
};

struct TNicheHostQueryWithWeight {
    float Weight;
    TWeightDebugInfo RealWeight;
    int64_t Domain;
    int64_t NormalizedQuery;
    int64_t TopCorrectedQuery;
    TString ClusterName;

    TNicheHostQueryWithWeight(float weight, TWeightDebugInfo realWeight, const TNicheHostQuery &row):
        Weight(weight),
        RealWeight(realWeight),
        Domain(row.GetDomain()),
        NormalizedQuery(row.GetNormalizedQuery()),
        TopCorrectedQuery(row.GetTopCorrectedQuery()),
        ClusterName(row.GetClusterName())
    {
    }

    bool operator<(const TNicheHostQueryWithWeight &other) const {
        return Weight < other.Weight;
    }

    TNicheHostQuerySampled0 GetProtobuf() const {
        TNicheHostQuerySampled0 res;
        res.SetDomain(Domain);
        res.SetNormalizedQuery(NormalizedQuery);
        res.SetTopCorrectedQuery(TopCorrectedQuery);
        res.SetClusterName(ClusterName);
        res.SetWeight(RealWeight.Weight);
        return res;
    }
};

// Domain, ClusterName
struct TSampleNicheReducer: public TTaggedReducer {
    void StartTagged(TTagedWriter) override final {
        TDssmL3Model::Ptr dssmL3Model(new TDssmL3Model);
        DssmCtr.Reset(new TDssmCtr(dssmL3Model));
    }

    void Save(IOutputStream& stream) const override {
        ::Save(&stream, DomainToQueryCount);
        ::Save(&stream, WembasterHosts);
        TTaggedReducer::Save(stream);
    }

    void Load(IInputStream& stream) override {
        ::Load(&stream, DomainToQueryCount);
        ::Load(&stream, WembasterHosts);
        TTaggedReducer::Load(stream);
    }

    TSampleNicheReducer() = default;

    TSampleNicheReducer(const THashMap<int64_t, int64_t> &domainToQueryCount,
                        const THashSet<int64_t> &webmasterHosts):
                            DomainToQueryCount(domainToQueryCount),
                            WembasterHosts(webmasterHosts) { }

    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        while (reader.IsValid()) {
            uint64_t totalCount{0};
            uint64_t clusterCount{0};
            int64_t dssmCount = 0;
            for (auto &row: reader.GetRows(NicheDomainClusterCountInputTag)) {
                totalCount = DomainToQueryCount[row.GetDomain()];
                clusterCount = row.GetCount();
                if (UniversalCluster(row.GetClusterName()) ||
                        PornCluster(row.GetClusterName()) ||
                        !WembasterHosts.contains(row.GetDomain())) {
                    totalCount = 0;
                }
            }
            if (totalCount == 0) {
                break;
            }
            for (auto &row: reader.GetRows(DomainsQueriesInputTag)) {
                TVector<float> dssm;
                DssmCtr->GetQueryEmbedding(row.GetTopCorrectedQueryText(), dssm);
                if (dssm.empty()) {
                    continue;
                }
                dssmCount += 1;
                if (Centroid.size() < dssm.size()) {
                    Centroid = dssm;
                } else {
                    for (size_t i = 0; i < dssm.size(); ++i) {
                        Centroid[i] += dssm[i];
                    }
                }
            }
            if (dssmCount) {
                for (auto &x: Centroid) {
                    x /= dssmCount;
                }
            }
            uint64_t count = RoundUpDivision(clusterCount * THIRD_SEGMENT_QUIERIES_AFTER_SAMPLING_MAX_COUNT, totalCount);
            ReservoirSample(reader, writer, count);
        }
    }
private:
    bool PornCluster(const TString &clusterName) {
        return clusterName.Contains("порн") ||
            clusterName.Contains("эро") ||
            clusterName.Contains("18") ||
            clusterName.Contains("хентай") ||
            clusterName.Contains("контент для взрослых");
    }

    bool UniversalCluster(const TString &clusterName) {
        static THashSet<TString> universalClusters{"", "N", "все вперемешку"};
        return universalClusters.contains(clusterName);
    }

// https://en.wikipedia.org/wiki/Reservoir_sampling#Algorithm_A-Res
    void ReservoirSample(TTagedReader reader, TTagedWriter writer, uint64_t k) {
        TSet<TNicheHostQueryWithWeight> h;
        for (auto &row: reader.GetRows(NicheHostQueryInputTag)) {
            auto weight = GetWeight(row);
            if (weight.Weight < 0.01) {
                weight.Weight = 0.01;
            }
            float r = 0;
            while (r == 0 || r == 1) {
                r = RandomNumber<float>();
            }
            r = std::pow(r, 1 / weight.Weight);
            if (h.size() < k) {
                h.insert(TNicheHostQueryWithWeight(r, weight, row));
            } else {
                if (r > h.begin()->Weight) {
                    h.erase(h.begin());
                    h.insert(TNicheHostQueryWithWeight(r, weight, row));
                }
            }
        }
        for (auto &x: h) {
            writer.AddRow(x.GetProtobuf(), NicheHostQuerySampled0OutputTag);
        }
    }

    TWeightDebugInfo GetWeight(const TNicheHostQuery &row) {
        float res = 0;
        if (row.GetSearchshareDist() >= 0) {
            res += 1 - Min<float>(row.GetSearchshareDist(), 1);
        }
        if (row.GetSpylogCount() >= 20) {
            res += 1;
        } else {
            res += 1 * row.GetSpylogCount() / 20.f;
        }
        if (row.GetSimilarGroupCount() >= 20) {
            res += 1;
        } else {
            res += 1 * row.GetSimilarGroupCount() / 20.f;
        }
        if (row.GetUserAddCount() >= 3) {
            res += 1;
        } else {
            res += 1 * row.GetUserAddCount() / 3.f;
        }
        if (row.GetShows() >= 50) {
            res += 1;
        } else {
            res += 1 * row.GetShows() / 50.f;
        }
        TVector<float> dssm;
        DssmCtr->GetQueryEmbedding(row.GetTopCorrectedQueryText(), dssm);
        auto dist = GetDistance(dssm, Centroid);
        if (dist >= 0) {
            res += 2 - 2 * Min<float>(dist, 5) / 5;
        }
        return {res, row.GetSearchshareDist(), row.GetSpylogCount(), row.GetSimilarGroupCount(), row.GetUserAddCount(), row.GetShows(), dist};
    }

    uint64_t RoundUpDivision(uint64_t num, uint64_t denum) {
        return (num + denum - 1) / denum;
    }

    THashMap<int64_t, int64_t> DomainToQueryCount;
    THashSet<int64_t> WembasterHosts;
    TVector<float> Centroid;
    THolder<TDssmCtr> DssmCtr;
};
REGISTER_REDUCER(TSampleNicheReducer);

// Domain
class TCalculateDomainSizeReducer: public NYT::IReducer<NYT::TTableReader<TNicheHostQuerySampled0>, NYT::TTableWriter<TNicheDomainClusterCount>> {
public:
    void Do(TReader* reader, TWriter* writer) override {
        TNicheDomainClusterCount dstMsg;
        int64_t count = 0;
        for (; reader->IsValid(); reader->Next(), ++count) {
            auto &row = reader->GetRow();
            dstMsg.SetDomain(row.GetDomain());
        }
        dstMsg.SetCount(count);
        if (count != 0) {
            writer->AddRow(dstMsg);
        }
    }
};
REGISTER_REDUCER(TCalculateDomainSizeReducer);

// Domain, ClusterName
class TCountDomainClusterCountReducer: public NYT::IReducer<NYT::TTableReader<TNicheHostQuerySampled0>, NYT::TTableWriter<TNicheDomainClusterCount>> {
public:
    void Do(TReader* reader, TWriter* writer) override {
        TNicheDomainClusterCount dstMsg;
        int64_t count = 0;
        for (; reader->IsValid(); reader->Next(), ++count) {
            auto &row = reader->GetRow();
            dstMsg.SetDomain(row.GetDomain());
            dstMsg.SetClusterName(row.GetClusterName());
        }
        dstMsg.SetCount(count);
        if (count != 0) {
            writer->AddRow(dstMsg);
        }
    }
};
REGISTER_REDUCER(TCountDomainClusterCountReducer);

// Domain, ClusterName
struct TSampleClusterPercentileReducer: public TTaggedReducer {
    void Save(IOutputStream& stream) const override {
        ::Save(&stream, DomainToCount);
        TTaggedReducer::Save(stream);
    }

    void Load(IInputStream& stream) override {
        ::Load(&stream, DomainToCount);
        TTaggedReducer::Load(stream);
    }

    TSampleClusterPercentileReducer() = default;

    TSampleClusterPercentileReducer(const THashMap<int64_t, int64_t> &domainToCount):
                            DomainToCount(domainToCount)
                            { }

    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        while (reader.IsValid()) {
            bool goodCount = false;
            for (auto &row: reader.GetRows(NicheDomainClusterCountInputTag)) {
                if (row.GetCount() >= DomainToCount[row.GetDomain()] * 0.001) {
                    goodCount = true;
                }
            }
            for (auto &row: reader.GetRows(NicheHostQuerySampled0InputTag)) {
                if (goodCount) {
                    writer.AddRow(row, NicheHostQuerySampled0OutputTag);
                }
            }
        }
    }
private:
    THashMap<int64_t, int64_t> DomainToCount;
};
REGISTER_REDUCER(TSampleClusterPercentileReducer);

// Domain
struct TDecodeDomainReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        TNicheHostQuerySampled1 dstMsg;
        while (reader.IsValid()) {
            for (auto &row: reader.GetRows(KeyHashInputTag)) {
                dstMsg.SetDomain(row.GetKey());
            }
            for (auto &row: reader.GetRows(NicheHostQuerySampled0InputTag)) {
                if (dstMsg.GetDomain() != "") {
                    dstMsg.SetNormalizedQuery(row.GetNormalizedQuery());
                    dstMsg.SetTopCorrectedQuery(row.GetTopCorrectedQuery());
                    dstMsg.SetClusterName(row.GetClusterName());
                    dstMsg.SetWeight(row.GetWeight());
                    writer.AddRow(dstMsg, NicheHostQuerySampled1OutputTag);
                }
            }
        }
    }
};
REGISTER_REDUCER(TDecodeDomainReducer);

// NormalizedQuery
struct TDecodeNormalizedQueryReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        TNicheHostQuerySampled2 dstMsg;
        while (reader.IsValid()) {
            for (auto &row: reader.GetRows(KeyHashInputTag)) {
                dstMsg.SetNormalizedQuery(row.GetKey());
            }
            for (auto &row: reader.GetRows(NicheHostQuerySampled1InputTag)) {
                if (dstMsg.GetNormalizedQuery() != "") {
                    dstMsg.SetDomain(row.GetDomain());
                    dstMsg.SetTopCorrectedQuery(row.GetTopCorrectedQuery());
                    dstMsg.SetClusterName(row.GetClusterName());
                    dstMsg.SetWeight(row.GetWeight());
                    writer.AddRow(dstMsg, NicheHostQuerySampled2OutputTag);
                }
            }
        }
    }
};
REGISTER_REDUCER(TDecodeNormalizedQueryReducer);

// TopCorrectedQuery
struct TDecodeTopCorrectedQueryReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        TNicheHostQuerySampled dstMsg;
        while (reader.IsValid()) {
            for (auto &row: reader.GetRows(KeyHashInputTag)) {
                dstMsg.SetTopCorrectedQuery(row.GetKey());
            }
            for (auto &row: reader.GetRows(NicheHostQuerySampled2InputTag)) {
                if (dstMsg.GetTopCorrectedQuery() != "") {
                    dstMsg.SetDomain(row.GetDomain());
                    dstMsg.SetNormalizedQuery(row.GetNormalizedQuery());
                    dstMsg.SetClusterName(row.GetClusterName());
                    dstMsg.SetWeight(row.GetWeight());
                    writer.AddRow(dstMsg, NicheHostQuerySampledOutputTag);
                }
            }
        }
    }
};
REGISTER_REDUCER(TDecodeTopCorrectedQueryReducer);

int TaskPrepareQueries(int, const char **) {
    static const TString DATE_LOWER_BOUND = "2022-03-01";
    const auto& cfg = TConfig::CInstance();

    auto client = NYT::CreateClient(cfg.MR_SERVER_HOST);

    TDeque<NYTUtils::TTableInfo> tables;
    NYTUtils::GetTableList(client, cfg.TABLE_USER_SESSIONS_DAILY, tables);
    std::sort(tables.begin(), tables.end(), NYTUtils::TTableInfo::TNameGreater());

    TDeque<TString> dates;

    for (auto table : tables) {
        auto date = NYTUtils::GetTableName(table.Name);
        if (date < DATE_LOWER_BOUND) {
            break;
        }
        auto result = CreateNicheTableNameByDate(cfg.TABLE_NICHE_CALCULATED, date);
        if (!client->Exists(result) && client->Exists(NYTUtils::JoinPath(cfg.TABLE_PRS_LOG_DAILY, date))) {
            LOG_INFO("Want to deal with %s", date.c_str());
            dates.push_back(date);
            break;
        }
    }

    // dates = {"2022-02-22"};

    for (auto &date: dates) {
        LOG_INFO("Preparing queries from user sessions %s", date.c_str());
        auto tx = client->StartTransaction();
        TString userSessionsTableName = NYTUtils::JoinPath(cfg.TABLE_USER_SESSIONS_DAILY, date);
        TString prsLogTableName = NYTUtils::JoinPath(cfg.TABLE_PRS_LOG_DAILY, date);

        TString intermediateTableName = cfg.TABLE_INTERMEDIATE_TABLE;
        TString preparedTableName = NYTUtils::JoinPath(cfg.TABLE_PREPARED_QUERIES, date);

        TString queryHash = NYTUtils::JoinPath(cfg.TABLE_QUERY_HASHES, date);
        TString hostHash = NYTUtils::JoinPath(cfg.TABLE_HOST_HASHES, date);

        TString queryToUIdTableName = cfg.TABLE_QUERY_TO_UID;
        TString domainToQueryTableName = cfg.TABLE_DOMAIN_TO_QUERY;

        if (!tx->Exists(intermediateTableName)) {
            const auto opts = NYT::TCreateOptions().Recursive(true);
            tx->CreateTable<TIntermediateQueries>(intermediateTableName, NYT::TSortColumns(), opts);
        }
        if (!tx->Exists(queryToUIdTableName)) {
            const auto opts = NYT::TCreateOptions().Recursive(true);
            tx->CreateTable<TQueryToUId>(queryToUIdTableName, NYT::TSortColumns(), opts);
        }
        if (!tx->Exists(domainToQueryTableName)) {
            const auto opts = NYT::TCreateOptions().Recursive(true);
            tx->CreateTable<TDomainToQuery>(domainToQueryTableName, NYT::TSortColumns(), opts);
        }

        NComputeGraph::TJobRunner runner;

        auto normalizedQueries = runner.AddJob([&]() {
            LOG_INFO("map user_sessions and prs_logs to intermediate state");
            TMapCmd<TNormalizeOriginalQueriesMapper>(tx)
                .Input(TTable<NUserSessions::NProto::TQuery>(tx, userSessionsTableName), UserSessionInputTag)
                .Input(TTable<NUserSessions::NProto::TQuery>(tx, prsLogTableName), PrsInputTag)
                .Output(TTable<TIntermediateQueries>(tx, intermediateTableName), IntermediateOutputTag)
                .Output(TTable<TKeyHash>(tx, queryHash), QueryHashOutputTag)
                .Output(TTable<TKeyHash>(tx, hostHash), HostHashOutputTag)
                .Output(TTable<TQueryToUId>(tx, queryToUIdTableName), QueryToUIdOutputTag)
                .Do();
        });

        auto sortedQueryToUId = runner.AddJob([&]() {
            TSortCmd<TQueryToUId>(tx)
                .Input<TQueryToUId>(queryToUIdTableName)
                .Output<TQueryToUId>(queryToUIdTableName)
                .By({"NormalizedQuery"})
                .Do();
        }, { normalizedQueries });

        auto removedTail = runner.AddJob([&]() {
            TReduceCmd<TRemoveTailReducer>(tx)
                .Input(TTable<TQueryToUId>(tx, queryToUIdTableName), QueryToUIdInputTag)
                .Output(TTable<TQueryToUIdCount>(tx, queryToUIdTableName), QueryToUIdCountOutputTag)
                .ReduceBy({"NormalizedQuery"})
                .Do();
        }, { sortedQueryToUId });

        auto sortedQueryHash = runner.AddJob([&]() {
            TSortCmd<TKeyHash>(tx)
                .Input<TKeyHash>(queryHash)
                .Output<TKeyHash>(queryHash)
                .By({"Key"})
                .Do();
        }, { normalizedQueries });

        auto sortedHostHash = runner.AddJob([&]() {
            TSortCmd<TKeyHash>(tx)
                .Input<TKeyHash>(hostHash)
                .Output<TKeyHash>(hostHash)
                .By({"Key"})
                .Do();
        }, { normalizedQueries });

        THashSet<int64_t> goodQueries;

        auto readQueryToUIdCount = runner.AddJob([&]() {
            for (auto reader = tx->CreateTableReader<TQueryToUIdCount>(queryToUIdTableName);
                reader->IsValid();
                reader->Next()) {
                auto &row = reader->GetRow();
                goodQueries.insert(row.GetNormalizedQuery());
            }

            LOG_INFO("cnt of good queries: %lu", goodQueries.size());
        }, { removedTail });

        auto sortedIntermediateTable = runner.AddJob([&]() {
            TSortCmd<TIntermediateQueries>(tx)
                .Input<TIntermediateQueries>(intermediateTableName)
                .Output<TIntermediateQueries>(intermediateTableName)
                .By({"NormalizedQuery"})
                .Do();
        }, { normalizedQueries });

        auto intersectedIntermediateTable = runner.AddJob([&]() {
            TMapCmd<TIntersectIntermediateTableMapper>(tx, new TIntersectIntermediateTableMapper(goodQueries))
                .Input(TTable<TIntermediateQueries>(tx, intermediateTableName), IntermediateInputTag)
                .Output(TTable<TIntermediateQueries>(tx, intermediateTableName), IntermediateOutputTag)
                .Output(TTable<TDomainToQuery>(tx, domainToQueryTableName), DomainToQueryOutputTag)
                .Do();
        }, { readQueryToUIdCount, sortedIntermediateTable });

        auto uniqueQueryHash = runner.AddJob([&]() {
            TCombineReduceCmd<TMakeKeyHashUniqueByKeyReducer, TMakeKeyHashUniqueByKeyReducer>(tx) //параллельно стараемся уменьшить и сделать табличку с хешами готовой к обратной трансляции в читабельный вид
                .Input(TTable<TKeyHash>(tx, queryHash), KeyHashInputTag)
                .Output(TTable<TKeyHash>(tx, queryHash), KeyHashOutputTag)
                .IntermediateCombineInputTag(KeyHashInputTag)
                .IntermediateCombineOutputTag(KeyHashOutputTag)
                .IntermediateReduceTag(KeyHashInputTag)
                .ReduceBy({"Key"})
                .Do();
        }, { sortedQueryHash });

        auto uniqueHostHash = runner.AddJob([&]() {
            TCombineReduceCmd<TMakeKeyHashUniqueByKeyReducer, TMakeKeyHashUniqueByKeyReducer>(tx) //параллельно стараемся уменьшить и сделать табличку с хешами готовой к обратной трансляции в читабельный вид
                .Input(TTable<TKeyHash>(tx, hostHash), KeyHashInputTag)
                .Output(TTable<TKeyHash>(tx, hostHash), KeyHashOutputTag)
                .IntermediateCombineInputTag(KeyHashInputTag)
                .IntermediateCombineOutputTag(KeyHashOutputTag)
                .IntermediateReduceTag(KeyHashInputTag)
                .ReduceBy({"Key"})
                .Do();
        }, { sortedHostHash });

        auto sortedHostHashByHash = runner.AddJob([&]() {
            TSortCmd<TKeyHash>(tx)
                .Input<TKeyHash>(hostHash)
                .Output<TKeyHash>(hostHash)
                .By({"Hash"})
                .Do();
        }, { uniqueHostHash });

        auto sortedQueryHashByHash = runner.AddJob([&]() {
            TSortCmd<TKeyHash>(tx)
                .Input<TKeyHash>(queryHash)
                .Output<TKeyHash>(queryHash)
                .By({"Hash"})
                .Do();
        }, { uniqueQueryHash });

        auto sortedDomainToQuery = runner.AddJob([&]() {
            TSortCmd<TDomainToQuery>(tx)
                .Input<TDomainToQuery>(domainToQueryTableName)
                .Output<TDomainToQuery>(domainToQueryTableName)
                .By({"Domain"})
                .Do();
        }, { intersectedIntermediateTable });

        auto removedTailDomainToQuery = runner.AddJob([&]() {
            TReduceCmd<TRemoveTailDomainToQueryReducer>(tx)
                .Input(TTable<TDomainToQuery>(tx, domainToQueryTableName), DomainToQueryInputTag)
                .Output(TTable<TDomainToQueryCount>(tx, domainToQueryTableName), DomainToQueryCountOutputTag)
                .ReduceBy({"Domain"})
                .MemoryLimit(2_GBs)
                .Do();
        }, { sortedDomainToQuery });

        THashSet<int64_t> goodDomains;

        auto readDomainToQueryFiltered = runner.AddJob([&]() {
            for (auto reader = tx->CreateTableReader<TDomainToQueryCount>(domainToQueryTableName);
                    reader->IsValid();
                    reader->Next()) {
                auto &row = reader->GetRow();
                goodDomains.insert(row.GetDomain());
            }

            LOG_INFO("cnt of good domains: %lu", goodDomains.size());
        }, { removedTailDomainToQuery });

        auto intersectedDomains = runner.AddJob([&]() {
            TMapCmd<TIntersectDomainsMapper>(tx, new TIntersectDomainsMapper(goodDomains))
                .Input(TTable<TIntermediateQueries>(tx, intermediateTableName), IntermediateInputTag)
                .Output(TTable<TIntermediateQueries>(tx, intermediateTableName), IntermediateOutputTag)
                .Do();
        }, { readDomainToQueryFiltered, intersectedIntermediateTable });

        auto sortedIntermediateQueriesFiltered2 = runner.AddJob([&]() {
            TSortCmd<TIntermediateQueries>(tx)
                .Input<TIntermediateQueries>(intermediateTableName)
                .Output<TIntermediateQueries>(intermediateTableName)
                .By({"Domain", "NormalizedQuery", "Segment"})
                .Do();
            LOG_INFO("mapped and filtered");
        }, { intersectedDomains });

        auto reducedAll = runner.AddJob([&]() {
            LOG_INFO("calculate stats started");
            TReduceCmd<TCalculateAllReducer>(tx)
                .Input(TTable<TIntermediateQueries>(tx, intermediateTableName), IntermediateInputTag)
                .Output(TTable<TPreparedQueries>(tx, preparedTableName), PreparedQueriesOutputTag)
                .ReduceBy({"Domain", "NormalizedQuery", "Segment"})
                .Do();
        }, { sortedIntermediateQueriesFiltered2 });

        auto sortedPreparedQueries = runner.AddJob([&]() {
            TSortCmd<TPreparedQueries>(tx)
                .Input<TPreparedQueries>(preparedTableName)
                .Output<TPreparedQueries>(preparedTableName)
                .By({"TopCorrectedQuery"})
                .Do();
        }, { reducedAll });

        auto innerJoinedQueriesWithClusters = runner.AddJob([&]() {
            TReduceCmd<TInnerJoinQueriesWithClusters>(tx)
                .Input(TTable<TQueryToCluster>(tx, NYT::TRichYPath(cfg.TABLE_SEARCHSHARE_QUERY_TO_CLUSTER_HASHED).RenameColumns({{"QueryHash", "TopCorrectedQuery"}})), SearchshareQueryToClusterInputTag)
                // .Input(TTable<TQueryToCluster>(tx, NYT::TRichYPath(cfg.TABLE_SPIKES_QUERY_TO_CLUSTER_HASHED).RenameColumns({{"QueryHash", "TopCorrectedQuery"}})), SpikesQueryToClusterInputTag)
                .Input(TTable<TPreparedQueries>(tx, preparedTableName), PreparedQueriesInputTag)
                .Output(TTable<TPreparedQueries>(tx, preparedTableName), PreparedQueriesOutputTag)
                .ReduceBy({"TopCorrectedQuery"})
                .Do();
        }, { sortedPreparedQueries });

        // NEED TO DO GET CLUSTER NAMES FIRST!

        THashMap<int64_t, std::pair<TString, TVector<float>>> clusterInfo;

        auto readClusterInfo = runner.AddJob([&]() {
            for (auto reader = tx->CreateTableReader<TClusterInfo>(cfg.TABLE_SEARCHSHARE_CLUSTERS_FULL_INFO);
                    reader->IsValid();
                    reader->Next()) {
                const auto &row = reader->GetRow();
                const auto &clusterId = row.GetClusterId();
                clusterInfo[clusterId] = {row.GetClusterName(), {}};

                for (auto &x: row.GetCentroidDssm()) {
                    clusterInfo[clusterId].second.push_back(x);
                }
            }

            LOG_INFO("cnt of clusters: %lu", clusterInfo.size());
        });

        auto sortedPreparedQueriesBySearchshareCluster = runner.AddJob([&]() {
            TSortCmd<TPreparedQueries>(tx)
                .Input<TPreparedQueries>(preparedTableName)
                .Output<TPreparedQueries>(preparedTableName)
                .By({"SearchshareCluster"})
                .Do();
        }, { innerJoinedQueriesWithClusters });

        auto processedQueries = runner.AddJob([&]() {
            TMapCmd<TProcessQueriesMapper>(tx, new TProcessQueriesMapper(clusterInfo))
                .Input(TTable<TPreparedQueries>(tx, preparedTableName), PreparedQueriesInputTag)
                .Output(TTable<TPreparedQueriesWithClusterInfo>(tx, preparedTableName), PreparedQueriesWithClusterInfoOutputTag)
                .AddYtFile(cfg.DSSM_MODEL)
                .MemoryLimit(2_GBs)
                .Do();
            LOG_INFO("stats are calculated");
        }, { sortedPreparedQueriesBySearchshareCluster, readClusterInfo });

        TString rivalsTable = cfg.TABLE_RIVALS;

        auto sortedPreparedQueriesProcessed = runner.AddJob([&]() {
            TSortCmd<TPreparedQueriesWithClusterInfo>(tx)
                .Input<TPreparedQueriesWithClusterInfo>(preparedTableName)
                .Output<TPreparedQueriesWithClusterInfo>(preparedTableName)
                .By({"Domain"})
                .Do();
        }, { processedQueries });

        TString nicheTableName = CreateNicheTableNameByDate(cfg.TABLE_NICHE_CALCULATED, date);
        TString queriesOfDomainTableName = NYTUtils::JoinPath(cfg.TABLE_QUERIES_TMP, "supplement_to_3rd_segment");

        auto builtNiche = runner.AddJob([&]() {
            LOG_INFO("build niche: start");
            TReduceCmd<TBuildNicheReducer>(tx)
                .Input(TTable<TRivalsWithMetric>(tx, NYT::TRichYPath(rivalsTable)), RivalsWithMetricInputTag)
                .Input(TTable<TPreparedQueriesWithClusterInfo>(tx, NYT::TRichYPath(preparedTableName).RenameColumns({{"Domain", "Rival"}})), PreparedQueriesWithClusterInfoInputTag)
                .Output(TTable<TNicheHostQuery>(tx, nicheTableName), NicheHostQueryOutputTag)
                .ReduceBy({"Rival"})
                .Do();
        }, { sortedPreparedQueriesProcessed });

        auto sortedNiche = runner.AddJob([&]() {
            TSortCmd<TNicheHostQuery>(tx)
                .Input<TNicheHostQuery>(nicheTableName)
                .Output<TNicheHostQuery>(nicheTableName)
                .By({"Domain", "NormalizedQuery"})
                .Do();
        }, { builtNiche });

        auto sortedPreparedProcessedTable = runner.AddJob([&]() {
            TSortCmd<TPreparedQueriesWithClusterInfo>(tx)
                .Input<TPreparedQueriesWithClusterInfo>(preparedTableName)
                .Output<TPreparedQueriesWithClusterInfo>(preparedTableName)
                .By({"Domain", "NormalizedQuery"})
                .Do();
        }, { builtNiche });

        auto leftThirdSegment = runner.AddJob([&]() {
            TReduceCmd<TLeaveOnlyThirdSegmentReducer>(tx)
                .Input(TTable<TPreparedQueriesWithClusterInfo>(tx, NYT::TRichYPath(preparedTableName).Columns({"Domain", "NormalizedQuery"})), PreparedQueriesWithClusterInfoInputTag)
                .Input(TTable<TNicheHostQuery>(tx, nicheTableName), NicheHostQueryInputTag)
                .Output(TTable<TNicheHostQuery>(tx, nicheTableName), NicheHostQueryOutputTag)
                .Output(TTable<TNicheHostQuery>(tx, queriesOfDomainTableName), DomainsQueriesOutputTag)
                .ReduceBy({"Domain", "NormalizedQuery"})
                .Do();
        }, { sortedPreparedProcessedTable, sortedNiche });

        auto sortedThirdSegment = runner.AddJob([&]() {
            TSortCmd<TNicheHostQuery>(tx)
                .Input<TNicheHostQuery>(nicheTableName)
                .Output<TNicheHostQuery>(nicheTableName)
                .By({"Domain", "NormalizedQuery"})
                .Do();
        }, { leftThirdSegment });

        auto sortedSupplement = runner.AddJob([&]() {
            TSortCmd<TNicheHostQuery>(tx)
                .Input<TNicheHostQuery>(queriesOfDomainTableName)
                .Output<TNicheHostQuery>(queriesOfDomainTableName)
                .By({"InitDomain", "ClusterName"})
                .Do();
        }, { leftThirdSegment });

        auto processedThirdSegment = runner.AddJob([&]() {
            TReduceCmd<TProcessThirdSegmentReducer>(tx)
                .Input(TTable<TNicheHostQuery>(tx, nicheTableName))
                .Output(TTable<TNicheHostQuery>(tx, nicheTableName))
                .ReduceBy({"Domain", "NormalizedQuery"})
                .Do();
            LOG_INFO("build niche: finish");
        }, { sortedThirdSegment });

        auto sortedThirdSegmentReduced = runner.AddJob([&]() {
            LOG_INFO("sampling stage: start");
            TSortCmd<TNicheHostQuery>(tx)
                .Input<TNicheHostQuery>(nicheTableName)
                .Output<TNicheHostQuery>(nicheTableName)
                .By({"Domain", "ClusterName"})
                .Do();
        }, { processedThirdSegment });

        auto reducedQueriesClustered = runner.AddJob([&]() {
            TReduceCmd<TCountQueriesClusteredReducer>(tx)
                .Input(TTable<TNicheHostQuery>(tx, NYT::TRichYPath(nicheTableName).Columns({"Domain", "ClusterName"})))
                .Output(TTable<TNicheDomainClusterCount>(tx, nicheTableName + "-niche-cluster-count"))
                .ReduceBy({"Domain", "ClusterName"})
                .Do();
        }, { sortedThirdSegmentReduced });

        auto sortedCount = runner.AddJob([&]() {
            TSortCmd<TNicheDomainClusterCount>(tx)
                .Input<TNicheDomainClusterCount>(nicheTableName + "-niche-cluster-count")
                .Output<TNicheDomainClusterCount>(nicheTableName + "-niche-cluster-count")
                .By({"Domain", "ClusterName"})
                .Do();
        }, { reducedQueriesClustered });

        auto countQueriesReduced = runner.AddJob([&]() {
            TReduceCmd<TCountQueriesReducer>(tx)
                .Input(TTable<TNicheHostQuery>(tx, NYT::TRichYPath(nicheTableName).Columns({"Domain"})))
                .Output(TTable<TNicheDomainClusterCount>(tx, nicheTableName + "-niche-tmp"))
                .ReduceBy({"Domain"})
                .Do();
        }, { sortedThirdSegment });

        THashMap<int64_t, int64_t> domainToQueryCount;

        auto readNicheDomainClusterCount = runner.AddJob([&]() {
            for (auto reader = tx->CreateTableReader<TNicheDomainClusterCount>(nicheTableName + "-niche-tmp");
                reader->IsValid();
                reader->Next()) {
                    auto &row = reader->GetRow();
                    domainToQueryCount[row.GetDomain()] = row.GetCount();
            }

            LOG_INFO("cnt of domainToQueryCount: %lu", domainToQueryCount.size());
        }, { countQueriesReduced });

        THashSet<int64_t> hashedWebmasterHosts;
        THashSet<TString> webmasterHosts;

        auto readWebmasterHosts = runner.AddJob([&]() {
            if (!NYTUtils::LoadWebmastersHosts(tx, cfg.TABLE_WEBMASTER_HOSTS, webmasterHosts, 4000000)) {
                ythrow yexception() << "there is problem with webmaster hosts table";
            }

            for (auto &host: webmasterHosts) {
                hashedWebmasterHosts.insert(FnvHash<int64_t>(HostUrlToDomain(host)));
            }
        });

        auto reducedSampleNiche = runner.AddJob([&]() {
            TReduceCmd<TSampleNicheReducer>(tx, new TSampleNicheReducer(domainToQueryCount, hashedWebmasterHosts))
                .Input(TTable<TNicheDomainClusterCount>(tx, nicheTableName + "-niche-cluster-count"), NicheDomainClusterCountInputTag)
                .Input(TTable<TNicheHostQuery>(tx, NYT::TRichYPath(queriesOfDomainTableName).RenameColumns({{"Domain", "_"}, {"InitDomain", "Domain"}})), DomainsQueriesInputTag)
                .Input(TTable<TNicheHostQuery>(tx, nicheTableName), NicheHostQueryInputTag)
                .Output(TTable<TNicheHostQuerySampled0>(tx, nicheTableName), NicheHostQuerySampled0OutputTag)
                .AddYtFile(cfg.DSSM_MODEL)
                .MemoryLimit(2_GBs)
                .ReduceBy({"Domain", "ClusterName"})
                .Do();
            LOG_INFO("sampling stage: finish");
        }, { readNicheDomainClusterCount, readWebmasterHosts, sortedCount, countQueriesReduced, sortedSupplement });

        auto sortedNicheHostQuerySampled0 = runner.AddJob([&]() {
            LOG_INFO("sampling clusters stage: start");
            TSortCmd<TNicheHostQuerySampled0>(tx)
                .Input<TNicheHostQuerySampled0>(nicheTableName)
                .Output<TNicheHostQuerySampled0>(nicheTableName)
                .By({"Domain", "ClusterName"})
                .Do();
        }, { reducedSampleNiche });

        auto reducedCalculateDomainSize = runner.AddJob([&]() {
            TReduceCmd<TCalculateDomainSizeReducer>(tx)
                .Input(TTable<TNicheHostQuerySampled0>(tx, nicheTableName))
                .Output(TTable<TNicheDomainClusterCount>(tx, nicheTableName + "-sizes"))
                .ReduceBy({"Domain"})
                .Do();
        }, { sortedNicheHostQuerySampled0 });

        auto reducedCountDomainClusterCount = runner.AddJob([&]() {
            TReduceCmd<TCountDomainClusterCountReducer>(tx)
                .Input(TTable<TNicheHostQuerySampled0>(tx, nicheTableName))
                .Output(TTable<TNicheDomainClusterCount>(tx, nicheTableName + "-c-sizes"))
                .ReduceBy({"Domain", "ClusterName"})
                .Do();
        }, { sortedNicheHostQuerySampled0 });

        auto sortedNicheDomainClusterCount = runner.AddJob([&]() {
            TSortCmd<TNicheDomainClusterCount>(tx)
                .Input<TNicheDomainClusterCount>(nicheTableName + "-c-sizes")
                .Output<TNicheDomainClusterCount>(nicheTableName + "-c-sizes")
                .By({"Domain", "ClusterName"})
                .Do();
        }, { reducedCountDomainClusterCount });

        THashMap<int64_t, int64_t> domainToCount;

        auto readNicheDomainClusterCountSizes = runner.AddJob([&]() {
            for (auto reader = tx->CreateTableReader<TNicheDomainClusterCount>(nicheTableName + "-sizes");
                reader->IsValid();
                reader->Next()) {
                    auto &row = reader->GetRow();
                    domainToCount[row.GetDomain()] = row.GetCount();
            }
        }, { reducedCalculateDomainSize });

        auto reducedSampleClusterPercentile = runner.AddJob([&]() {
            TReduceCmd<TSampleClusterPercentileReducer>(tx, new TSampleClusterPercentileReducer(domainToCount))
                .Input(TTable<TNicheDomainClusterCount>(tx, nicheTableName + "-c-sizes"), NicheDomainClusterCountInputTag)
                .Input(TTable<TNicheHostQuerySampled0>(tx, nicheTableName), NicheHostQuerySampled0InputTag)
                .Output(TTable<TNicheHostQuerySampled0>(tx, nicheTableName), NicheHostQuerySampled0OutputTag)
                .ReduceBy({"Domain", "ClusterName"})
                .Do();
            LOG_INFO("sampling clusters stage: finish");
        }, { readNicheDomainClusterCountSizes, sortedNicheDomainClusterCount, reducedCountDomainClusterCount });

        auto finalStep = runner.AddJob([&]() {
            LOG_INFO("decode: start");
// Domain decode
            TSortCmd<TNicheHostQuerySampled0>(tx)
                .Input<TNicheHostQuerySampled0>(nicheTableName)
                .Output<TNicheHostQuerySampled0>(nicheTableName)
                .By({"Domain"})
                .Do();
            TReduceCmd<TDecodeDomainReducer>(tx)
                .Input(TTable<TKeyHash>(tx, NYT::TRichYPath(hostHash).RenameColumns({{"Hash", "Domain"}})), KeyHashInputTag)
                .Input(TTable<TNicheHostQuerySampled0>(tx, nicheTableName), NicheHostQuerySampled0InputTag)
                .Output(TTable<TNicheHostQuerySampled1>(tx, nicheTableName), NicheHostQuerySampled1OutputTag)
                .ReduceBy({"Domain"})
                .Do();
// NormalizedQuery decode
            TSortCmd<TNicheHostQuerySampled1>(tx)
                .Input<TNicheHostQuerySampled1>(nicheTableName)
                .Output<TNicheHostQuerySampled1>(nicheTableName)
                .By({"NormalizedQuery"})
                .Do();
            TReduceCmd<TDecodeNormalizedQueryReducer>(tx)
                .Input(TTable<TKeyHash>(tx, NYT::TRichYPath(queryHash).RenameColumns({{"Hash", "NormalizedQuery"}})), KeyHashInputTag)
                .Input(TTable<TNicheHostQuerySampled1>(tx, nicheTableName), NicheHostQuerySampled1InputTag)
                .Output(TTable<TNicheHostQuerySampled2>(tx, nicheTableName), NicheHostQuerySampled2OutputTag)
                .ReduceBy({"NormalizedQuery"})
                .Do();
// TopCorrectedQuery decode
            TSortCmd<TNicheHostQuerySampled2>(tx)
                .Input<TNicheHostQuerySampled2>(nicheTableName)
                .Output<TNicheHostQuerySampled2>(nicheTableName)
                .By({"TopCorrectedQuery"})
                .Do();
            TReduceCmd<TDecodeTopCorrectedQueryReducer>(tx)
                .Input(TTable<TKeyHash>(tx, NYT::TRichYPath(queryHash).RenameColumns({{"Hash", "TopCorrectedQuery"}})), KeyHashInputTag)
                .Input(TTable<TNicheHostQuerySampled2>(tx, nicheTableName), NicheHostQuerySampled2InputTag)
                .Output(TTable<TNicheHostQuerySampled>(tx, nicheTableName), NicheHostQuerySampledOutputTag)
                .ReduceBy({"TopCorrectedQuery"})
                .Do();
            LOG_INFO("decode: finish");
        }, { reducedSampleClusterPercentile, sortedHostHashByHash, sortedQueryHashByHash });

        runner.AddJob([&]() {
            TSortCmd<TNicheHostQuerySampled>(tx)
                .Input<TNicheHostQuerySampled>(nicheTableName)
                .Output<TNicheHostQuerySampled>(nicheTableName)
                .By({"Domain"})
                .Do();
            tx->Remove(nicheTableName + "-sizes");
            tx->Remove(nicheTableName + "-niche-tmp");
            tx->Remove(nicheTableName + "-niche-cluster-count");
            tx->Remove(nicheTableName + "-c-sizes");
            tx->Remove(intermediateTableName);
            tx->Remove(queryHash);
            tx->Remove(hostHash);
            tx->Remove(queryToUIdTableName);
            tx->Remove(domainToQueryTableName);
            tx->Remove(preparedTableName);
            tx->Remove(queriesOfDomainTableName);
        }, { finalStep });

        runner.Run();
        tx->Commit();
    }
    return 0;
}

} //namespace NNiche2
} //namespace NWebmaster
