#include <util/digest/fnv.h>
#include <util/generic/hash_set.h>
#include <util/generic/size_literals.h>

#include <wmconsole/version3/library/jupiter/jupiter.h>
#include <wmconsole/version3/wmcutil/hostid.h>
#include <wmconsole/version3/wmcutil/url.h>
#include <wmconsole/version3/wmcutil/yt/triggers.h>
#include <robot/library/yt/static/command.h>
#include <robot/jupiter/protos/export.pb.h>
#include <robot/jupiter/protos/acceptance.pb.h>
#include <wmconsole/version3/wmcutil/yt/yt_runner.h>
#include <wmconsole/version3/processors/tools/IKS/utils/canonizer.h>
#include <wmconsole/version3/processors/tools/host2vec/utils/utils.h>
#include <wmconsole/version3/processors/user_sessions/library/regions_limiter.h>
#include <library/cpp/compute_graph/compute_graph.h>
#include <wmconsole/version3/processors/user_sessions/niche2/conf/config.h>
#include <wmconsole/version3/processors/user_sessions/niche2/preparation/tables.pb.h>
#include <robot/library/yt/static/tags.h>
#include <library/cpp/text_processing/tokenizer/tokenizer.h>
#include <wmconsole/version3/wmcutil/yt/misc.h>
#include <util/string/vector.h>
#include <wmconsole/version3/library/dssm/dssm_utils.h>
#include <wmconsole/version3/processors/user_sessions/library/utils.h>
#include <wmconsole/version3/processors/indexing/hostinfo/protos/hostinfo.pb.h>
#include <wmconsole/version3/processors/user_sessions/protos/user_sessions.pb.h>
#include <wmconsole/version3/wmcutil/yt/transfer_manager.h>

#include <wmconsole/version3/processors/user_sessions/niche/conf/config.h>

#include "task_prepare_report.h"

namespace NWebmaster {
namespace NNiche2 {
using namespace NJupiter;
using namespace NProto;

static const float TOP_QUERIES_PERCENT = 0.9;
static const int64_t RIVALS_IN_REPORT_UPPER_BOUND = 20;
static const int64_t UNIQUE_UID_LOWER_BOUND = 5;
static const int64_t RIVAL_URLS_THRESHOLD = 20;
static const int64_t RIVALS_REAL_THRESHOLD = 70;
static const int64_t RIVALS_THRESHOLD = RIVALS_REAL_THRESHOLD + 1;
static const int64_t QUERIES_PER_DOMAIN_THRESHOLD = 100;
static const int64_t SERP_QUERIES_THRESHOLD = 50000;
static const int64_t PRS_LOG_QUERIES_THRESHOLD = 2000;
static const int64_t NEW_MARKETS_QUERIES_THRESHOLD = 1000;
static const int64_t GROUP_ID_SHARE_COUNT = 700;
static const TString NEW_MARKETS_GROUP_ID_PREFIX = "NEW_MARKETS";
static const TString NICHE_GROUP_ID_PREFIX = "NICHE";
static const TString RADAR_TYPE_UNDEF = "undef";
static const TString RADAR_TYPE_ITSELF = "itself";
static const THashMap<int, double> POSITION_CLICK_SHARE = {
    {0, 0.44145802},
    {1, 0.17935543},
    {2, 0.0991559},
    {3, 0.06437186},
    {4, 0.04511521},
    {5, 0.033906512},
    {6, 0.026201464},
    {7, 0.02084791},
    {8, 0.017130397},
    {9, 0.0148711195},
    {10, 0.012429338},
    {11, 0.00802707},
    {12, 0.005058365},
    {13, 0.0037565376},
    {14, 0.00320557},
    {15, 0.002985383},
    {16, 0.0021870318},
    {17, 0.0015464955},
    {18, 0.0012920866},
    {19, 0.0012179656},
    {20, 0.0008981123},
    {21, 0.0008094015},
    {22, 0.0007817494},
    {23, 0.0007629992},
    {24, 0.00074897404},
    {25, 0.0006287237},
    {26, 0.00057357876},
    {27, 0.00054003723},
    {28, 0.0005263504},
    {29, 0.0005377767},
    {30, 0.00039226116},
    {31, 0.0003618701},
    {32, 0.00035197384},
    {33, 0.00034606113},
    {34, 0.00034189475},
    {35, 0.00029397398},
    {36, 0.00027331087},
    {37, 0.00025987063},
    {38, 0.0002549697},
    {39, 0.00026186308},
    {40, 0.00021976128},
    {41, 0.00020842045},
    {42, 0.00020256363},
    {43, 0.00019855474},
    {44, 0.00019574503},
    {45, 0.00018173702},
    {46, 0.0001716505},
    {47, 0.00016451556},
    {48, 0.00016322076},
    {49, 0.00016969365},
    {50, 0.004058719},
};

static double GetPositionVisibility(int position) {
    Y_ASSERT(0 <= position && position <= 50);
    return POSITION_CLICK_SHARE.at(position) / POSITION_CLICK_SHARE.at(0);
}

static const TInputTag<NUserSessions::NProto::TQuery> UserSessionInputTag(1);
static const TInputTag<NUserSessions::NProto::TQuery> PrsInputTag(2);
static const TInputTag<TQueryInfo> QueryInfoInputTag(3);
static const TInputTag<TQueryInfo> TmpQueryInfoInputTag(4);
static const TInputTag<TQueryInfo> Tmp2QueryInfoInputTag(5);
static const TInputTag<TGroupIdToQuery> GroupIdToQueryInputTag(6);
static const TInputTag<TWordstat> WordstatInputTag(7);
static const TInputTag<TRivalByVisibilityPart> RivalByVisibilityPartInputTag(8);
static const TInputTag<TNicheHostQuerySampled> NicheHostQuerySampledInputTag(9);
static const TInputTag<TRivalByVisibilityPart> TmpRivalByVisibilityPartInputTag(10);
static const TInputTag<TQueryPart> TmpQueryPartInputTag(11);
static const TInputTag<TQueryPart> QueryPartInputTag(12);
static const TInputTag<NJupiter::TContentAttrsForWebmaster> ContentAttrsInputTag(13);
static const TInputTag<NJupiter::TAcceptanceUrlForWebMasterRecord> UrlForWebmasterRecordInputTag(14);
static const TInputTag<THostContentAttrStatistics> TitleHostContentAttrStatisticsInputTag(15);
static const TInputTag<THostContentAttrStatistics> DescrHostContentAttrStatisticsInputTag(16);
static const TInputTag<TQueryPartWWeight> QueryPartWWeightInputTag(17);
static const TInputTag<TQueryToUrlWPos> QueryToUrlWPosInputTag(18);
static const TInputTag<TQueryToUrl> QueryToUrlInputTag(19);
static const TInputTag<TQueryToUID> QueryToUIDInputTag(20);
static const TInputTag<TQueryToUIDCount> QueryToUIDCountInputTag(21);
static const TInputTag<TQueryInfoWithCount> QueryInfoWithCountInputTag(23);
static const TInputTag<TQueryVisibility> QueryVisibilityInputTag(24);
static const TInputTag<TDomainRegionShows> DomainRegionShowsInputTag(25);
static const TInputTag<TDomainRegionShows> TmpDomainRegionShowsInputTag(26);
static const TInputTag<TRivalVisibility> RivalVisibilityInputTag(27);

static const TOutputTag<TQueryInfo> QueryInfoOutputTag(1);
static const TOutputTag<TQueryInfo> CopyQueryInfoOutputTag(2);
static const TOutputTag<TRivalByVisibilityPart> RivalByVisibilityPartOutputTag(3);
static const TOutputTag<TRivalByVisibilityPart> TmpRivalByVisibilityPartOutputTag(4);
static const TOutputTag<TWordstat> WordstatOutputTag(6);
static const TOutputTag<TQueryPart> QueryPartOutputTag(7);
static const TOutputTag<TQueryPart> TmpQueryPartOutputTag(8);
static const TOutputTag<TGroupIdToQuery> GroupIdToQueryOutputTag(9);
static const TOutputTag<TQueryPartWWeight> QueryPartWWeightOutputTag(10);
static const TOutputTag<TNicheHostQuerySampled> NicheHostQuerySampledOutputTag(11);
static const TOutputTag<TQueryToUrlWPos> QueryToUrlWPosOutputTag(12);
static const TOutputTag<TQueryToUrl> QueryToUrlOutputTag(13);
static const TOutputTag<TQueryToUID> QueryToUIDOutputTag(14);
static const TOutputTag<TQueryToUIDCount> QueryToUIDCountOutputTag(15);
static const TOutputTag<TRivalReportRow> RivalReportRowOutputTag(16);
static const TOutputTag<TQueryInfoWithCount> QueryInfoWithCountOutputTag(18);
static const TOutputTag<TQueryVisibility> QueryVisibilityOutputTag(18);
static const TOutputTag<TDomainRegionShows> DomainRegionShowsOutputTag(19);
static const TOutputTag<TRivalVisibility> RivalVisibilityOutputTag(20);

static NYT::TRichYPath DebugPath(NYT::TRichYPath path) {
    // path.AddRange(NYT::TReadRange().FromRowIndices(0, 100000));
    return path;
}

static NYT::TRichYPath DebugPath(const TString& table) {
    NYT::TRichYPath path(table);
    return DebugPath(path);
}

static TString HostUrlToDomain(TString hostUrl) {
    return TString(CutWWWPrefix(CutMPrefix(CutSchemePrefix(hostUrl))));
}

static TString CodeSimilarUrls(const THashSet<TString>& urls) {
    TString res;
    int cnt = 0;
    for (auto &url: urls) {
        cnt++;
        if (cnt == RIVAL_URLS_THRESHOLD + 1) break;
        res.append(url);
        res.append(">");
    }
    return res;
}

static TDeque<TString> ParseSimilarUrls(const TString& pattern) {
    TDeque<TString> mbRes = StringSplitter(pattern).SplitBySet(">").SkipEmpty();
    TDeque<TString> res;
    for (auto &mbUrl: mbRes) {
        if (mbUrl.StartsWith("http://") || mbUrl.StartsWith("https://")) {
            res.push_back(mbUrl);
        }
    }
    return res;
}

struct TGetPornQueriesMapper: public TTaggedMapper {
    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        while (reader.IsValid()) {
            DoByTag(reader, writer, UserSessionInputTag);
            DoByTag(reader, writer, PrsInputTag);
        }
    }
private:
    void DoByTag(TTagedReader reader, TTagedWriter writer, TInputTag<NUserSessions::NProto::TQuery> inputTag) {
        TQueryInfo dstMsg;
        for (const auto &row: reader.GetRows(inputTag)) {
            dstMsg.SetQuery(row.GetCorrectedQuery());
            if (IsCp(row)) {
                dstMsg.SetIsMobile(true);
                writer.AddRow(dstMsg, QueryInfoOutputTag);
            }
            if (IsPorn(row)) {
                dstMsg.SetIsMobile(false);
                writer.AddRow(dstMsg, QueryInfoOutputTag);
            }
        }
    }

    bool IsCp(const NUserSessions::NProto::TQuery &row) const {
        return row.GetQueryIsCPPred() >= 0.7;
    }

    bool IsPorn(const NUserSessions::NProto::TQuery &row) const {
        return row.GetUpperPornoUpperPl() == 100;
    }
};
REGISTER_MAPPER(TGetPornQueriesMapper)

struct TRoundUpTheRegionQueriesMapper: public TTaggedMapper {
    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        while (reader.IsValid()) {
            DoByTag(reader, writer, UserSessionInputTag, true);
            DoByTag(reader, writer, PrsInputTag, false);
        }
    }
private:
    bool GoodHost(const TString &host) {
        return host.find('%') == TString::npos
            && host.find('.') != TString::npos
            && (host.length() <= 64 || host.StartsWith("https://xn--") || host.StartsWith("http://xn--"))
            && !host.Contains("entity_search_doc");
    }

    bool IsWizard(const TString &url) {
        auto withoutScheme = CutSchemePrefix(url);
        if (withoutScheme.StartsWith("yandex.") && (
            withoutScheme.EndsWith("/imageswizard") ||
            withoutScheme.EndsWith("/video/videoblend")
            )) {
            return true;
        }
        if (withoutScheme.StartsWith("maps.yandex.") &&
            withoutScheme.EndsWith("/geo_wizard")) {
            return true;
        }
        return false;
    }


    bool InvisibleUrl(const TString &host, const TString &path) {
        static THashSet<TString> cutUrls{ "yandex.ru/direct_showcase",
                                       "yandex.ru/fake_metadoc",
                                       "passport.yandex.ru" };
        if (host.EndsWith("yandex.net")) {
            return true;
        }
        auto url = CutSchemePrefix(host + path);
        if (cutUrls.contains(url)) {
            return true;
        }
        return false;
    }

    void DoByTag(TTagedReader reader, TTagedWriter writer, TInputTag<NUserSessions::NProto::TQuery> inputTag, bool currentIsUserSession) {
        TQueryInfo dstMsg;
        TQueryToUID q2uid;
        for (auto &row: reader.GetRows(inputTag)) {
            // выкидываем не utf запросы [ошибки парсера]
            if (!IsUtf(row.GetCorrectedQuery()) || !IsUtf(row.GetQuery()) || !IsUtf(row.GetHost() + row.GetPath())) {
                continue;
            }
            // выкидываем мусорные домены
            if (!GoodHost(row.GetHost())) {
                continue;
            }
            // выкидываем навигационные запросы
            if (row.GetUpperQueryNavPred() > 0.5) {
                continue;
            }
            // выкидываем не органику
            if (!NUserSessions::IsVisibleQueryInWebmaster(row) && currentIsUserSession) {
                continue;
            }
            if (!NUserSessions::IsVisiblePrsLogQuery(row) && !currentIsUserSession) {
                continue;
            }
            // продолжаем выкидывать не органику -- выкидываем технические яндексовые урлы
            if (InvisibleUrl(row.GetHost(), row.GetPath())) {
                continue;
            }
            // выкидываем рекламу
            if (CutSchemePrefix(row.GetHost()).StartsWith("yabs.yandex.")) {
                continue;
            }
            // срезаем доминирование PRS-логов в запросах
            if (!currentIsUserSession && RandomNumber<float>() <= 0.6) {
                continue;
            }
            // убираем визардов
            if (IsWizard(row.GetHost() + row.GetPath())) {
                continue;
            }
            dstMsg.SetDomain(HostUrlToDomain(row.GetHost()));
            dstMsg.SetUrl(row.GetHost() + row.GetPath());
            dstMsg.SetRegionId(RoundUpRegion(row.GetRegionId()));
            dstMsg.SetQuery(row.GetCorrectedQuery());
            dstMsg.SetSegment(currentIsUserSession? E_USER_SESSION : E_PRS_LOGS);
            dstMsg.SetIsMobile(row.GetIsMobile() || row.GetIsPad());
            if (currentIsUserSession) {
                dstMsg.SetPosition(Min<double>(row.GetPosition(), 49.0f));
                dstMsg.SetClicks(row.GetClicks());
                dstMsg.SetShows(row.GetShows());
                // to filter out rare queries
                q2uid.SetQuery(dstMsg.GetQuery());
                q2uid.SetUID(row.GetUID());
            } else {
                dstMsg.SetPosition(-1);
                dstMsg.SetClicks(0);
                dstMsg.SetShows(0);
            }
            writer.AddRow(dstMsg, QueryInfoOutputTag);
            writer.AddRow(q2uid, QueryToUIDOutputTag);
        }
    }


    int64_t RoundUpRegion(int64_t regionId) {
        int64_t parentId = regionId;
        try {
            const static NGeobase::TLookup geo(TConfig::GEOBASE_FILE_LITE);
            parentId = geo.GetParentIdWithType(regionId, static_cast<int>(ROUND_LEVEL));
            if (parentId == 0) {
                parentId = geo.GetParentId(regionId);
            }
        } catch(std::runtime_error &e) {
        }
        return parentId;
    }

    const NGeobase::ERegionType ROUND_LEVEL = NGeobase::ERegionType::REGION;
};
REGISTER_MAPPER(TRoundUpTheRegionQueriesMapper)

// "Domain", "Url", "RegionId", "Query", "IsMobile"
struct TCalculateAverageReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        TQueryInfo dstMsg;
        int64_t totalCount = 0;
        double sumPosition = 0;
        int64_t sumClicks = 0;
        int64_t sumShows = 0;
        bool hasPosition = false;
        while (reader.IsValid()) {
            for (const auto &row: reader.GetRows(QueryInfoInputTag)) {
                totalCount += 1;
                dstMsg = row;
                if (row.GetPosition() >= 0) {
                    hasPosition = true;
                    sumPosition += row.GetPosition();
                }
                sumClicks += row.GetClicks();
                sumShows += row.GetShows();
            }
        }
        if (totalCount > 0 && dstMsg.GetDomain() != "" && dstMsg.GetQuery() != "" && dstMsg.GetUrl() != "") {
            dstMsg.SetPosition(sumPosition / totalCount);
            if (!hasPosition) {
                dstMsg.SetPosition(-1);
            }
            dstMsg.SetClicks(sumClicks);
            dstMsg.SetShows(sumShows);
            writer.AddRow(dstMsg, QueryInfoOutputTag);
        }
    }
};
REGISTER_REDUCER(TCalculateAverageReducer)


// "Query", "Url", "RegionId"
struct TAveragePositionReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        TQueryToUrlWPos dstMsg;
        int64_t totalClicks = 0;
        for (const auto &row: reader.GetRows(QueryToUrlWPosInputTag)) {
            dstMsg.SetUrl(row.GetUrl());
            dstMsg.SetQuery(row.GetQuery());
            dstMsg.SetRegionId(row.GetRegionId());
            totalClicks += row.GetClicks();
        }
        dstMsg.SetClicks(-totalClicks);
        writer.AddRow(dstMsg, QueryToUrlWPosOutputTag);
    }
};
REGISTER_REDUCER(TAveragePositionReducer)

// "Query", "UID"
struct TLeaveUniqueReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        for (const auto &row: reader.GetRows(QueryToUIDInputTag)) {
            writer.AddRow(row, QueryToUIDOutputTag);
            return;
        }
    }
};
REGISTER_REDUCER(TLeaveUniqueReducer)

// "Query"
struct TGetDomainsUIDCountReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        int64_t cnt = 0;
        TQueryToUIDCount dstMsg;
        for (const auto &row: reader.GetRows(QueryToUIDInputTag)) {
            dstMsg.SetQuery(row.GetQuery());
            cnt += 1;
        }
        dstMsg.SetUIDCount(cnt);
        writer.AddRow(dstMsg, QueryToUIDCountOutputTag);
    }
};
REGISTER_REDUCER(TGetDomainsUIDCountReducer)

// "Query"
struct TFilterQueriesReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        int64_t cnt = 0;
        for (const auto &row: reader.GetRows(QueryToUIDCountInputTag)) {
            cnt += row.GetUIDCount();
        }
        if (cnt < UNIQUE_UID_LOWER_BOUND) {
            return;
        }
        for (const auto &row: reader.GetRows(QueryInfoInputTag)) {
            writer.AddRow(row, QueryInfoOutputTag);
        }
    }
};
REGISTER_REDUCER(TFilterQueriesReducer)

// "Query"
struct TLeaveTopUrlReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        TQueryToUrl dstMsg;
        THashSet<TString> urls;
        int64_t total = 0;
        for (const auto &row: reader.GetRows(QueryToUrlWPosInputTag)) {
            total++;
            dstMsg.SetQuery(row.GetQuery());
            dstMsg.SetRegionId(row.GetRegionId());
            urls.insert(row.GetUrl());
            if (total > RIVAL_URLS_THRESHOLD) {
                break;
            }
        }
        dstMsg.SetUrls(CodeSimilarUrls(urls));
        writer.AddRow(dstMsg, QueryToUrlOutputTag);
    }
};
REGISTER_REDUCER(TLeaveTopUrlReducer)

struct TCollectAllQueriesMapper: public TTaggedMapper {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        TWordstat dstMsg;
        for (auto &row: reader.GetRows(QueryInfoInputTag)) {
            dstMsg.SetQuery(row.GetQuery());
            dstMsg.SetQueryShows(row.GetShows());
            writer.AddRow(dstMsg, WordstatOutputTag);
        }
        for (auto &row: reader.GetRows(GroupIdToQueryInputTag)) {
            dstMsg.SetQuery(row.GetQuery());
            dstMsg.SetQueryShows(0);
            writer.AddRow(dstMsg, WordstatOutputTag);
        }
    }
};
REGISTER_MAPPER(TCollectAllQueriesMapper)

// "Query"
struct TQueryShowsAddReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        TWordstat dstMsg;
        int64_t queryShows = 0;
        for (auto &row: reader.GetRows(WordstatInputTag)) {
            queryShows += row.GetQueryShows();
            dstMsg.SetQuery(row.GetQuery());
        }
        dstMsg.SetQueryShows(queryShows);
        writer.AddRow(dstMsg, WordstatOutputTag);
    }
};
REGISTER_REDUCER(TQueryShowsAddReducer)

// "Domain", "RegionId"
struct TSumUpShowsReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        TDomainRegionShows dstMsg;
        int64_t shows = 0;
        for (auto &row: reader.GetRows(QueryInfoInputTag)) {
            dstMsg.SetDomain(row.GetDomain());
            dstMsg.SetRegionId(row.GetRegionId());
            shows += row.GetShows();
        }
        dstMsg.SetShows(shows);
        writer.AddRow(dstMsg, DomainRegionShowsOutputTag);
    }
};
REGISTER_REDUCER(TSumUpShowsReducer)

// "Domain", "RegionId"
struct TCalculateWeightReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        int64_t shows = 0;
        for (auto &row: reader.GetRows(TmpDomainRegionShowsInputTag)) {
            shows += row.GetShows();
        }
        for (auto row: reader.GetRows(DomainRegionShowsInputTag)) {
            float weight = 0;
            if (shows != 0) {
                weight = ((float)row.GetShows()) / shows;
            }
            row.SetWeight(weight);
            writer.AddRow(row, DomainRegionShowsOutputTag);
        }
    }
};
REGISTER_REDUCER(TCalculateWeightReducer)

// "Domain", "RegionId"
struct TAddRegionWeightReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        float weight = 0;
        for (auto &row: reader.GetRows(DomainRegionShowsInputTag)) {
            weight = row.GetWeight();
        }
        for (auto row: reader.GetRows(RivalByVisibilityPartInputTag)) {
            row.SetWeight(weight);
            writer.AddRow(row, RivalByVisibilityPartOutputTag);
        }
    }
};
REGISTER_REDUCER(TAddRegionWeightReducer)

// "Domain", "RegionId", "GroupId", "Rival", "RivalType"
struct TGetRidOfIsMobileReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        TRivalVisibility dstMsg;
        double visibility = 0;
        int64_t cnt = 0;
        for (auto &row: reader.GetRows(RivalByVisibilityPartInputTag)) {
            dstMsg.SetDomain(row.GetDomain());
            dstMsg.SetRival(row.GetRival());
            dstMsg.SetRegionId(row.GetRegionId());
            dstMsg.SetGroupId(row.GetGroupId());
            dstMsg.SetRivalType(row.GetRivalType());
            visibility += GetVisibility(row);
            cnt += 1;
        }
        if (cnt > 0) {
            visibility /= cnt;
        }
        dstMsg.SetVisibility(visibility);
        writer.AddRow(dstMsg, RivalVisibilityOutputTag);
    }
private:
    double GetVisibility(const TRivalByVisibilityPart &row) {
        if (row.GetGroupPopularity() == 0) {
            return 0;
        }
        return row.GetRivalPopularity() / row.GetGroupPopularity() * row.GetWeight();
    }
};
REGISTER_REDUCER(TGetRidOfIsMobileReducer)

struct TCollectAllGroupsMapper: public TTaggedMapper {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        TGroupIdToQuery dstMsg;
        for (auto &row: reader.GetRows(GroupIdToQueryInputTag)) {
            dstMsg.SetQuery(row.GetQuery());
            dstMsg.SetGroupId(row.GetGroupId() + ":" + row.GetDomain());
            writer.AddRow(dstMsg, GroupIdToQueryOutputTag);
        }
    }
};
REGISTER_MAPPER(TCollectAllGroupsMapper)

struct TNegateShowsMapper: public TTaggedMapper {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        for (auto row: reader.GetRows(QueryInfoWithCountInputTag)) {
            row.SetShows(-row.GetShows());
            writer.AddRow(row, QueryInfoWithCountOutputTag);
        }
    }
};
REGISTER_MAPPER(TNegateShowsMapper)

// "Query", "GroupId"
struct TMakeUniqueQueryGroupReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        for (auto &row: reader.GetRows(GroupIdToQueryInputTag)) {
            writer.AddRow(row, GroupIdToQueryOutputTag);
            return;
        }
    }
};
REGISTER_REDUCER(TMakeUniqueQueryGroupReducer)

// "Query"
struct TJoinGroupsAndShowsReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        THashSet<TString> groupIds;
        for (auto &row: reader.GetRows(GroupIdToQueryInputTag)) {
            groupIds.insert(row.GetGroupId());
        }
        int cnt = 0;
        int64_t queryShows = 0;
        for (auto &row: reader.GetRows(WordstatInputTag)) {
            cnt += 1;
            queryShows += row.GetQueryShows();
        }
        for (auto row: reader.GetRows(QueryInfoInputTag)) {
            int newMarketsGroupsCount = 0;
            for (auto &groupId: groupIds) {
                row.SetGroupId(groupId);
                row.SetQueryShows(queryShows);
                if (groupId.StartsWith(NEW_MARKETS_GROUP_ID_PREFIX + ":")) {
                    newMarketsGroupsCount++;
                    if (newMarketsGroupsCount > GROUP_ID_SHARE_COUNT) {
                        continue;
                    }
                }
                writer.AddRow(row, QueryInfoOutputTag);
            }
        }
        groupIds.clear();
        THashMap<TString, int64_t> groupAndPos;
        for (auto &row: reader.GetRows(TmpQueryInfoInputTag)) {
            if (row.GetSegment() == E_USER_SESSION && row.GetPosition() >= 0) {
                if (groupAndPos.contains(row.GetGroupId())) {
                    groupAndPos[row.GetGroupId()] = Min(row.GetPosition(), groupAndPos[row.GetGroupId()]);
                } else {
                    groupAndPos[row.GetGroupId()] = row.GetPosition();
                }
                if (groupAndPos.size() > GROUP_ID_SHARE_COUNT) {
                    int64_t maxPosition = 0;
                    auto it = groupAndPos.begin();
                    for (auto i = groupAndPos.begin(); i != groupAndPos.end(); ++i) {
                        if (i->second > maxPosition) {
                            maxPosition = i->second;
                            it = i;
                        }
                    }
                    groupAndPos.erase(it);
                }
            }
        }
        for (auto row: reader.GetRows(Tmp2QueryInfoInputTag)) {
            row.SetQueryShows(queryShows);
            writer.AddRow(row, QueryInfoOutputTag);
            for (auto &[groupId, _]: groupAndPos) {
                row.SetGroupId(groupId);
                writer.AddRow(row, QueryInfoOutputTag);
            }
        }
    }
};
REGISTER_REDUCER(TJoinGroupsAndShowsReducer)

// "Domain"
struct TJoinGroupsAndShowsForNicheReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        int64_t countOfSerpQueries = 0;
        int64_t countOfPrsLogQueries = 0;
        for (auto row: reader.GetRows(QueryInfoInputTag)) {
            auto position = row.GetPosition();
            if (row.GetSegment() == E_USER_SESSION && countOfSerpQueries++ > GetPositionVisibility(position) * SERP_QUERIES_THRESHOLD) {
                continue;
            }
            if (row.GetSegment() == E_PRS_LOGS && countOfPrsLogQueries++ > PRS_LOG_QUERIES_THRESHOLD) {
                continue;
            }
            row.SetGroupId(NICHE_GROUP_ID_PREFIX + ":" + row.GetDomain());
            writer.AddRow(row, QueryInfoOutputTag);
        }
    }
};
REGISTER_REDUCER(TJoinGroupsAndShowsForNicheReducer)

// "Domain"
struct TGetDomainToCountReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        int64_t count = 0;
        for (auto &row: reader.GetRows(QueryInfoInputTag)) {
            if (row.GetSegment() != E_USER_SESSION) {
                continue;
            }
            count += 1;
        }
        TQueryInfoWithCount dstMsg;
        for (auto &row: reader.GetRows(TmpQueryInfoInputTag)) {
            if (row.GetSegment() != E_USER_SESSION) {
                dstMsg.SetCount(0);
            } else {
                dstMsg.SetCount(count);
            }
            dstMsg.SetDomain(row.GetDomain());
            dstMsg.SetUrl(row.GetUrl());
            dstMsg.SetRegionId(row.GetRegionId());
            dstMsg.SetPosition(row.GetPosition());
            dstMsg.SetClicks(row.GetClicks());
            dstMsg.SetShows(row.GetShows());
            dstMsg.SetQuery(row.GetQuery());
            dstMsg.SetSegment(row.GetSegment());
            dstMsg.SetIsMobile(row.GetIsMobile());
            dstMsg.SetGroupId(row.GetGroupId());
            dstMsg.SetQueryShows(row.GetQueryShows());
            writer.AddRow(dstMsg, QueryInfoWithCountOutputTag);
        }
    }
};
REGISTER_REDUCER(TGetDomainToCountReducer)

// "Domain"
struct TGetTopDomainsQueriesReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        int64_t needToKeepAlive = 0;
        bool gotCount = false;
        int64_t currentCount = 0;
        for (auto &row: reader.GetRows(QueryInfoWithCountInputTag)) {
            if (row.GetSegment() != E_USER_SESSION) {
                writer.AddRow(GetRidOfCount(row), QueryInfoOutputTag);
                continue;
            }
            if (!gotCount) {
                needToKeepAlive = TOP_QUERIES_PERCENT * row.GetCount();
                gotCount = true;
            }
            if (currentCount++ <= needToKeepAlive) {
                writer.AddRow(GetRidOfCount(row), QueryInfoOutputTag);
            }
        }
    }
private:
    TQueryInfo GetRidOfCount(const TQueryInfoWithCount &row) {
        TQueryInfo newRow;
        newRow.SetDomain(row.GetDomain());
        newRow.SetUrl(row.GetUrl());
        newRow.SetRegionId(row.GetRegionId());
        newRow.SetPosition(row.GetPosition());
        newRow.SetClicks(row.GetClicks());
        auto shows = row.GetShows();
        if (shows < 0) shows *= -1;
        newRow.SetShows(shows);
        newRow.SetQuery(row.GetQuery());
        newRow.SetSegment(row.GetSegment());
        newRow.SetIsMobile(row.GetIsMobile());
        newRow.SetGroupId(row.GetGroupId());
        newRow.SetQueryShows(row.GetQueryShows());
        return newRow;
    }
};
REGISTER_REDUCER(TGetTopDomainsQueriesReducer)

// "Domain"
struct TJoinGroupsAndShowsForNicheNewMarketsReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        int64_t cnt = 0;
        TGroupIdToQuery dstMsg;
        for (auto row: reader.GetRows(NicheHostQuerySampledInputTag)) {
            cnt++;
            if (cnt > NEW_MARKETS_QUERIES_THRESHOLD) {
                return;
            }
            dstMsg.SetQuery(row.GetTopCorrectedQuery());
            dstMsg.SetGroupId(NEW_MARKETS_GROUP_ID_PREFIX + ":" + row.GetDomain());
            writer.AddRow(dstMsg, GroupIdToQueryOutputTag);
        }
    }
};
REGISTER_REDUCER(TJoinGroupsAndShowsForNicheNewMarketsReducer)

// "Domain", "RegionId", "GroupId", "IsMobile", "Query", "Position"
struct TCalculateVisibilityPositionShowsReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        TQueryVisibility dstMsg;
        double vPositionShows = 0;
        int64_t shows = 0;
        for (auto &row: reader.GetRows(QueryInfoInputTag)) {
            dstMsg.SetDomain(row.GetDomain());
            dstMsg.SetGroupId(row.GetGroupId());
            dstMsg.SetIsMobile(row.GetIsMobile());
            dstMsg.SetRegionId(row.GetRegionId());
            dstMsg.SetQuery(row.GetQuery());
            dstMsg.SetPosition(row.GetPosition());
            if (row.GetPosition() >= 0) {
                vPositionShows += GetPositionVisibility(row.GetPosition()) * row.GetShows();
                shows += row.GetShows();
            }
        }
        dstMsg.SetShows(shows);
        dstMsg.SetVPositionShows(vPositionShows);
        writer.AddRow(dstMsg, QueryVisibilityOutputTag);
    }
};
REGISTER_REDUCER(TCalculateVisibilityPositionShowsReducer)

// "Domain", "RegionId", "GroupId", "IsMobile", "Query"
struct TCalculateQueryVisibilityReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        TQueryVisibility dstMsg;
        double vPositionShows = 0;
        int64_t shows = 0;
        for (auto &row: reader.GetRows(QueryVisibilityInputTag)) {
            dstMsg.SetDomain(row.GetDomain());
            dstMsg.SetGroupId(row.GetGroupId());
            dstMsg.SetIsMobile(row.GetIsMobile());
            dstMsg.SetRegionId(row.GetRegionId());
            dstMsg.SetQuery(row.GetQuery());
            vPositionShows += row.GetVPositionShows();
            shows += row.GetShows();
        }
        double visibility = 0;
        if (shows > 0) {
            visibility = vPositionShows / shows;
        }
        dstMsg.SetVisibility(visibility);
        dstMsg.SetVPositionShows(vPositionShows);
        dstMsg.SetShows(shows);
        writer.AddRow(dstMsg, QueryVisibilityOutputTag);
    }
};
REGISTER_REDUCER(TCalculateQueryVisibilityReducer)

// "Domain", "RegionId", "GroupId", "IsMobile"
struct TCalculateDomainVisibilityReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        double sumVisibility = 0;
        int64_t total = 0;
        TRivalByVisibilityPart dstMsg;
        for (auto &row: reader.GetRows(QueryVisibilityInputTag)) {
            dstMsg.SetDomain(row.GetDomain());
            dstMsg.SetRegionId(row.GetRegionId());
            dstMsg.SetGroupId(row.GetGroupId());
            dstMsg.SetIsMobile(row.GetIsMobile());
            sumVisibility += row.GetVisibility();
            total += 1;
        }
        dstMsg.SetGroupPopularity(total);
        dstMsg.SetRivalPopularity(-sumVisibility);
        writer.AddRow(dstMsg, RivalByVisibilityPartOutputTag);
    }
};
REGISTER_REDUCER(TCalculateDomainVisibilityReducer)

// "RegionId", "GroupId", "IsMobile"
struct TLeaveOnlyTopRivalsReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        int64_t cnt = 0;
        for (auto &row: reader.GetRows(RivalByVisibilityPartInputTag)) {
            cnt += 1;
            if (cnt <= RIVALS_THRESHOLD) {
                writer.AddRow(row, RivalByVisibilityPartOutputTag);
            } else {
                return;
            }
        }
    }
};
REGISTER_REDUCER(TLeaveOnlyTopRivalsReducer)

// "RegionId", "GroupId", "IsMobile"
struct TSetRivalsReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        TDeque<TDomainToVisibility> rivals;
        for (auto &row: reader.GetRows(TmpRivalByVisibilityPartInputTag)) {
            bool found = false;
            for (auto &x: rivals) {
                if (x.domain == row.GetDomain()) {
                    found = true;
                }
            }
            if (!found) {
                rivals.push_back({row.GetDomain(), row.GetGroupPopularity(), row.GetRivalPopularity()});
            }
        }
        for (auto row: reader.GetRows(RivalByVisibilityPartInputTag)) {
            if (row.GetRivalPopularity() < 0) {
                row.SetRivalPopularity(-row.GetRivalPopularity());
            }
            if (row.GetGroupPopularity() < 0) {
                row.SetGroupPopularity(-row.GetGroupPopularity());
            }
            writer.AddRow(row, RivalByVisibilityPartOutputTag);
            for (auto &[rival, groupPopularity, rivalPopularity]: rivals) {
                if (rival == row.GetDomain()) {
                    continue;
                } else {
                    row.SetRival(rival);
                    row.SetRivalPopularity(rivalPopularity);
                    row.SetGroupPopularity(groupPopularity);
                    if (row.GetRivalPopularity() < 0) {
                        row.SetRivalPopularity(-row.GetRivalPopularity());
                    }
                    if (row.GetGroupPopularity() < 0) {
                        row.SetGroupPopularity(-row.GetGroupPopularity());
                    }
                    writer.AddRow(row, RivalByVisibilityPartOutputTag);
                }
            }
        }
    }
private:
    struct TDomainToVisibility {
        TString domain;
        double groupPopularity;
        double domainPopularity;
    };
};
REGISTER_REDUCER(TSetRivalsReducer)

// "Domain", "GroupId", "Url", "Query"
struct TCalculateAverageWithoutRegionIdReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        TQueryPart dstMsg;
        int64_t askedCount = 0;
        double position = 0;
        int64_t totalCount = 0;
        int64_t clicks = 0;
        for (auto &row: reader.GetRows(QueryInfoInputTag)) {
            dstMsg.SetDomain(row.GetDomain());
            dstMsg.SetQuery(row.GetQuery());
            dstMsg.SetGroupId(row.GetGroupId());
            dstMsg.SetLandingPage(row.GetUrl());
            dstMsg.SetRegionId(row.GetRegionId());
            if (row.GetPosition() >= 0) {
                totalCount += 1;
                askedCount = row.GetQueryShows();
                position += row.GetPosition();
                clicks += row.GetClicks();
            }
        }
        if (totalCount > 0) {
            position /= totalCount;
        } else {
            position = -1;
        }
        dstMsg.SetAskedCount(askedCount);
        dstMsg.SetClicks(clicks);
        dstMsg.SetAveragePosition(position);
        writer.AddRow(dstMsg, QueryPartOutputTag);
    }
};
REGISTER_REDUCER(TCalculateAverageWithoutRegionIdReducer)

struct TGetUrlsForWebmasterSimpleProblemsMapper: public TTaggedMapper {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        TQueryPart dstMsg;
        while (reader.IsValid()) {
            for (auto &row: reader.GetRows(UrlForWebmasterRecordInputTag)) {
                dstMsg.SetLandingPage(GetUrl(row));
                dstMsg.ClearLandingProblems();
                if (IsNonCanonical(row)) {
                    dstMsg.AddLandingProblems(EUrlProblem::NON_CANONICAL);
                }
                if (IsDuplication(row)) {
                    dstMsg.AddLandingProblems(EUrlProblem::DUPLICATION);
                }
                if (IsBadHttpResponse(row)) {
                    dstMsg.AddLandingProblems(EUrlProblem::BAD_RESPONSE);
                }
                if (!dstMsg.GetLandingProblems().empty()){
                    writer.AddRow(dstMsg, QueryPartOutputTag);
                }
            }
            for (auto &row: reader.GetRows(ContentAttrsInputTag)) {
                dstMsg.SetLandingPage(GetUrl(row));
                dstMsg.ClearLandingProblems();
                if (!row.HasTitle() || row.GetTitle().empty()) {
                    dstMsg.AddLandingProblems(EUrlProblem::EMPTY_TITLE);
                }
                if (!row.HasMetaDescription() || row.GetMetaDescription().empty()) {
                    dstMsg.AddLandingProblems(EUrlProblem::EMPTY_META_DESCRIPTION);
                }
                if (!dstMsg.GetLandingProblems().empty()){
                    writer.AddRow(dstMsg, QueryPartOutputTag);
                }
            }
            for (auto &row: reader.GetRows(TitleHostContentAttrStatisticsInputTag)) {
                for (auto &x: row.GetSamples().GetPageSamples()) {
                    dstMsg.SetLandingPage(row.GetHost() + x.GetPath());
                    dstMsg.ClearLandingProblems();
                    dstMsg.AddLandingProblems(EUrlProblem::DUPLICATE_TITLE);
                    writer.AddRow(dstMsg, QueryPartOutputTag);
                }
            }
            for (auto &row: reader.GetRows(DescrHostContentAttrStatisticsInputTag)) {
                for (auto &x: row.GetSamples().GetPageSamples()) {
                    dstMsg.SetLandingPage(row.GetHost() + x.GetPath());
                    dstMsg.ClearLandingProblems();
                    dstMsg.AddLandingProblems(EUrlProblem::DUPLICATE_META_DESCRIPTION);
                    writer.AddRow(dstMsg, QueryPartOutputTag);
                }
            }
        }
    }
private:
    const THashSet<int32_t> GoodHttpCodes = { 200, 301, 302 };

    template <class T>
    TString GetUrl(const T &row) {
        return row.GetHost() + row.GetPath();
    }

    bool IsNonCanonical(const TAcceptanceUrlForWebMasterRecord &row) {
        return row.HasRelCanonicalTarget() && row.GetRelCanonicalTarget() != GetUrl(row);
    }

    bool IsDuplication(const TAcceptanceUrlForWebMasterRecord &row) {
        return GetUrl(row) != row.GetMainHost() + row.GetMainPath();
    }

    bool IsBadHttpResponse(const TAcceptanceUrlForWebMasterRecord &row) {
        return !GoodHttpCodes.contains(row.GetHttpCode());
    }
};
REGISTER_MAPPER(TGetUrlsForWebmasterSimpleProblemsMapper)

// "LandingPage"
struct TMakeUniqueReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        TQueryPart dstMsg;
        THashSet<int64_t> problems;
        for (auto &row: reader.GetRows(QueryPartInputTag)) {
            dstMsg.SetLandingPage(row.GetLandingPage());
            for (auto &problem: row.GetLandingProblems()) {
                problems.insert(static_cast<int64_t>(problem));
            }
        }
        for (auto &problem: problems) {
            dstMsg.AddLandingProblems(static_cast<EUrlProblem>(problem));
        }
        writer.AddRow(dstMsg, QueryPartOutputTag);
    }
};
REGISTER_REDUCER(TMakeUniqueReducer)

// "LandingPage"
struct TAddProblemsReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        THashSet<int64_t> problems;
        bool visited = false;
        for (auto &row: reader.GetRows(TmpQueryPartInputTag)) {
            if (!visited) {
                visited = true;
            } else {
                Y_ASSERT(false);
            }
            for (auto &problem: row.GetLandingProblems()) {
                problems.insert(static_cast<int64_t>(problem));
            }
        }
        for (auto row: reader.GetRows(QueryPartInputTag)) {
            for (auto &problem: problems) {
                row.AddLandingProblems(static_cast<EUrlProblem>(problem));
            }
            writer.AddRow(row, QueryPartOutputTag);
        }
    }
};
REGISTER_REDUCER(TAddProblemsReducer)

// "Query", "Domain"
struct TAddQueriesWithoutLandingsReducer1: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        bool hasLanding = false;
        for (auto &row: reader.GetRows(QueryPartInputTag)) {
            Y_UNUSED(row);
            hasLanding = true;
        }
        TQueryPart dstMsg;
        for (auto &row: reader.GetRows(NicheHostQuerySampledInputTag)) {
            if (!hasLanding) {
                dstMsg.SetGroupId(NEW_MARKETS_GROUP_ID_PREFIX + ":" + row.GetDomain());
                dstMsg.SetDomain(row.GetDomain());
                dstMsg.SetQuery(row.GetTopCorrectedQuery());
                writer.AddRow(dstMsg, QueryPartOutputTag);
            }
        }
    }
};
REGISTER_REDUCER(TAddQueriesWithoutLandingsReducer1)

// "Query", "Domain"
struct TAddQueriesWithoutLandingsReducer2: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        bool hasLanding = false;
        for (auto &row: reader.GetRows(QueryPartInputTag)) {
            Y_UNUSED(row);
            hasLanding = true;
        }
        TQueryPart dstMsg;
        for (auto &row: reader.GetRows(GroupIdToQueryInputTag)) {
            if (!hasLanding) {
                dstMsg.SetGroupId(row.GetGroupId() + ":" + row.GetDomain());
                dstMsg.SetQuery(row.GetQuery());
                dstMsg.SetDomain(row.GetDomain());
                writer.AddRow(dstMsg, QueryPartOutputTag);
            }
        }
    }
};
REGISTER_REDUCER(TAddQueriesWithoutLandingsReducer2)

// "Domain", "GroupId", "LandingPage"
struct TFilterQueriesOfUrlReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        int cnt = 0;
        for (auto &row: reader.GetRows(QueryPartInputTag)) {
            cnt++;
            if (cnt > QUERIES_PER_DOMAIN_THRESHOLD) {
                return;
            }
            writer.AddRow(row, QueryPartOutputTag);
        }
    }
};
REGISTER_REDUCER(TFilterQueriesOfUrlReducer)

// "Domain", "GroupId", "IsMobile", "RivalType", "RegionId"
struct TSumUpVisibilitiesReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        double rivalPop = 0;
        double groupPop = 0;
        TRivalReportRow dstMsg;
        TSet<std::pair<double, TString>> rivals;
        for (auto &row: reader.GetRows(RivalByVisibilityPartInputTag)) {
            dstMsg.SetDomain(row.GetDomain());
            dstMsg.SetGroupId(row.GetGroupId());
            dstMsg.SetIsMobile(row.GetIsMobile());
            dstMsg.SetRivalType(row.GetRivalType());
            dstMsg.SetRegionId(row.GetRegionId());
            rivals.insert({-row.GetRivalPopularity(), row.GetRival()});
            rivalPop += row.GetRivalPopularity();
            groupPop += row.GetGroupPopularity();
        }
        int cnt = 0;
        for (auto &[_, rival]: rivals) {
            if (rival == dstMsg.GetDomain()) {
                continue;
            }
            dstMsg.AddRivals(rival);
            cnt += 1;
            if (cnt >= RIVALS_IN_REPORT_UPPER_BOUND) {
                break;
            }
        }
        dstMsg.SetGroupPopularity(groupPop);
        dstMsg.SetRivalPopularity(rivalPop);
        writer.AddRow(dstMsg, RivalReportRowOutputTag);
    }
};
REGISTER_REDUCER(TSumUpVisibilitiesReducer)

struct TFilterAllDevStuffMapper: public TTaggedMapper {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        for (auto &row: reader.GetRows(QueryPartInputTag)) {
            bool needToWrite = true;
            TString groupId = row.GetGroupId();
            needToWrite &= groupId.EndsWith(":" + row.GetDomain());
            if (needToWrite) {
                writer.AddRow(row, QueryPartOutputTag);
            }
        }
    }
};
REGISTER_MAPPER(TFilterAllDevStuffMapper)

struct TFilterRivalsDevStuffMapper: public TTaggedMapper {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        for (auto row: reader.GetRows(RivalByVisibilityPartInputTag)) {
            bool needToWrite = true;
            TString groupId = row.GetGroupId();
            needToWrite &= groupId.EndsWith(":" + row.GetDomain());
            if (needToWrite) {
                writer.AddRow(row, RivalByVisibilityPartOutputTag);
            }
        }
    }
};
REGISTER_MAPPER(TFilterRivalsDevStuffMapper)

struct TAddQueriesWithoutLandingsMapper: public TTaggedMapper {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        while (reader.IsValid()) {
            for (auto row: reader.GetRows(QueryPartInputTag)) {
                if (!row.HasRegionId()) {
                    row.SetRegionId(0);
                }
                writer.AddRow(row, QueryPartOutputTag);
            }
            for (auto row: reader.GetRows(TmpQueryPartInputTag)) {
                if (!row.HasRegionId()) {
                    row.SetRegionId(0);
                }
                writer.AddRow(row, QueryPartOutputTag);
            }
        }
    }
};
REGISTER_MAPPER(TAddQueriesWithoutLandingsMapper)

struct TDivideBasedOnLandingPageMapper: public TTaggedMapper {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        while (reader.IsValid()) {
            for (auto &row: reader.GetRows(QueryPartInputTag)) {
                if (row.HasLandingPage()) {
                    writer.AddRow(row, QueryPartOutputTag);
                } else {
                    writer.AddRow(row, TmpQueryPartOutputTag);
                }
            }
        }
    }
};
REGISTER_MAPPER(TDivideBasedOnLandingPageMapper)

struct TFilterNonWebmasterHostsRivalsMapper: public TTaggedMapper {
public:
    void Save(IOutputStream& stream) const override {
        ::Save(&stream, WebmasterHosts);
        TTaggedMapper::Save(stream);
    }

    void Load(IInputStream& stream) override {
        ::Load(&stream, WebmasterHosts);
        TTaggedMapper::Load(stream);
    }

    TFilterNonWebmasterHostsRivalsMapper(const THashSet<TString> &webmasterHosts): WebmasterHosts(webmasterHosts) {}

    TFilterNonWebmasterHostsRivalsMapper() = default;

    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        while (reader.IsValid()) {
            for (auto row: reader.GetRows(RivalByVisibilityPartInputTag)) {
                if (WebmasterHosts.contains(row.GetDomain())) {
                    TString groupId = row.GetGroupId();
                    groupId = groupId.substr(0, groupId.find(':'));
                    row.SetGroupId(groupId);
                    writer.AddRow(row, RivalByVisibilityPartOutputTag);
                }
            }
        }
    }
private:
    THashSet<TString> WebmasterHosts;
};
REGISTER_MAPPER(TFilterNonWebmasterHostsRivalsMapper)

struct TFilterNonWebmasterHostsMapper: public TTaggedMapper {
public:
    void Save(IOutputStream& stream) const override {
        ::Save(&stream, WebmasterHosts);
        TTaggedMapper::Save(stream);
    }

    void Load(IInputStream& stream) override {
        ::Load(&stream, WebmasterHosts);
        TTaggedMapper::Load(stream);
    }

    TFilterNonWebmasterHostsMapper(const THashSet<TString> &webmasterHosts): WebmasterHosts(webmasterHosts) {}

    TFilterNonWebmasterHostsMapper() = default;

    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        while (reader.IsValid()) {
            for (auto row: reader.GetRows(QueryInfoInputTag)) {
                if (WebmasterHosts.contains(row.GetDomain())) {
                    TString domain = row.GetGroupId();
                    domain = domain.substr(domain.find(':') + 1);
                    if (WebmasterHosts.contains(domain)) {
                        writer.AddRow(row, QueryInfoOutputTag);
                    }
                }
            }
        }
    }
private:
    THashSet<TString> WebmasterHosts;
};
REGISTER_MAPPER(TFilterNonWebmasterHostsMapper)

struct TFilterNonWebmasterHostsQueriesMapper: public TTaggedMapper {
public:
    void Save(IOutputStream& stream) const override {
        ::Save(&stream, WebmasterHosts);
        TTaggedMapper::Save(stream);
    }

    void Load(IInputStream& stream) override {
        ::Load(&stream, WebmasterHosts);
        TTaggedMapper::Load(stream);
    }

    TFilterNonWebmasterHostsQueriesMapper(const THashSet<TString> &webmasterHosts): WebmasterHosts(webmasterHosts) {}

    TFilterNonWebmasterHostsQueriesMapper() = default;

    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        while (reader.IsValid()) {
            for (auto row: reader.GetRows(QueryPartInputTag)) {
                if (WebmasterHosts.contains(row.GetDomain())) {
                    TString groupId = row.GetGroupId();
                    groupId = groupId.substr(0, groupId.find(':'));
                    row.SetGroupId(groupId);
                    writer.AddRow(row, QueryPartOutputTag);
                }
            }
        }
    }
private:
    THashSet<TString> WebmasterHosts;
};
REGISTER_MAPPER(TFilterNonWebmasterHostsQueriesMapper)

struct TFilterPornQueriesMapper: public TTaggedMapper {
public:
    void Save(IOutputStream& stream) const override {
        ::Save(&stream, CpQueries);
        ::Save(&stream, PornQueries);
        TTaggedMapper::Save(stream);
    }

    void Load(IInputStream& stream) override {
        ::Load(&stream, CpQueries);
        ::Load(&stream, PornQueries);
        TTaggedMapper::Load(stream);
    }

    TFilterPornQueriesMapper(const THashSet<int64_t> &cpQueries, const THashSet<int64_t> &pornQueries):
        CpQueries(cpQueries), PornQueries(pornQueries) {}

    TFilterPornQueriesMapper() = default;

    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        while (reader.IsValid()) {
            for (auto &row: reader.GetRows(QueryInfoInputTag)) {
                if (!CpQueries.contains(FnvHash<int64_t>(row.GetQuery()))) {
                    writer.AddRow(row, QueryInfoOutputTag);
                }
            }
            for (auto &row: reader.GetRows(NicheHostQuerySampledInputTag)) {
                if (!PornQueries.contains(FnvHash<int64_t>(row.GetTopCorrectedQuery())) &&
                    !CpQueries.contains(FnvHash<int64_t>(row.GetTopCorrectedQuery()))) {
                    writer.AddRow(row, NicheHostQuerySampledOutputTag);
                }
            }
        }
    }
private:
    THashSet<int64_t> CpQueries;
    THashSet<int64_t> PornQueries;
};
REGISTER_MAPPER(TFilterPornQueriesMapper)

struct TAddTypesToRivalsMapper: public TTaggedMapper {
public:
    void Save(IOutputStream& stream) const override {
        ::Save(&stream, DomainToType);
        TTaggedMapper::Save(stream);
    }

    void Load(IInputStream& stream) override {
        ::Load(&stream, DomainToType);
        TTaggedMapper::Load(stream);
    }

    TAddTypesToRivalsMapper(const THashMap<TString, TString> &domainToType):
        DomainToType(domainToType) {}

    TAddTypesToRivalsMapper() = default;

    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        while (reader.IsValid()) {
            for (auto row: reader.GetRows(RivalByVisibilityPartInputTag)) {
                if (!row.HasRival()) {
                    row.SetRival(row.GetDomain());
                }
                if (row.GetDomain() != row.GetRival()) {
                    if (DomainToType.contains(row.GetRival())) {
                        row.SetRivalType(DomainToType[row.GetRival()]);
                    } else {
                        row.SetRivalType(RADAR_TYPE_UNDEF);
                    }
                } else {
                    row.SetRivalType(RADAR_TYPE_ITSELF);
                }
                writer.AddRow(row, RivalByVisibilityPartOutputTag);
            }
        }
    }
private:
    THashMap<TString, TString> DomainToType;
};
REGISTER_MAPPER(TAddTypesToRivalsMapper)

struct TGetQueryToPageMapper: public TTaggedMapper {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        TQueryToUrlWPos dstMsg;
        for (auto row: reader.GetRows(QueryInfoInputTag)) {
            if (row.GetSegment() == E_USER_SESSION) {
                dstMsg.SetQuery(row.GetQuery());
                dstMsg.SetUrl(row.GetUrl());
                dstMsg.SetClicks(row.GetClicks());
                dstMsg.SetRegionId(row.GetRegionId());
                writer.AddRow(dstMsg, QueryToUrlWPosOutputTag);
                dstMsg.SetRegionId(0);
                writer.AddRow(dstMsg, QueryToUrlWPosOutputTag);
            }
        }
    }
};
REGISTER_MAPPER(TGetQueryToPageMapper)

// "Query"
struct TSetSimilarPagesReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        TString similarUrls;
        for (auto &row: reader.GetRows(QueryToUrlInputTag)) {
            similarUrls = row.GetUrls();
        }
        for (auto row: reader.GetRows(QueryPartInputTag)) {
            row.SetSimilarPage(similarUrls);
            writer.AddRow(row, QueryPartOutputTag);
        }
    }
};
REGISTER_REDUCER(TSetSimilarPagesReducer)

// "Query", "Domain", "Rival"
struct TRemoveRivalsReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        TQueryPart dstMsg;
        for (auto &row: reader.GetRows(QueryPartInputTag)) {
            dstMsg.ClearLandingProblems();
            dstMsg.SetDomain(row.GetDomain());
            dstMsg.SetGroupId(row.GetGroupId());
            dstMsg.SetQuery(row.GetQuery());
            dstMsg.SetAskedCount(row.GetAskedCount());
            dstMsg.SetAveragePosition(row.GetAveragePosition());
            dstMsg.SetClicks(row.GetClicks());
            dstMsg.SetLandingPage(row.GetLandingPage());
            for (auto &problem: row.GetLandingProblems()) {
                dstMsg.AddLandingProblems(static_cast<EUrlProblem>(problem));
            }
            dstMsg.SetSimilarPage(row.GetSimilarPage());
            writer.AddRow(dstMsg, QueryPartOutputTag);
            break;
        }
    }
};
REGISTER_REDUCER(TRemoveRivalsReducer)

// "Query"
struct TAddShowsReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        int64_t shows = 0;
        for (auto &row: reader.GetRows(WordstatInputTag)) {
            shows = row.GetQueryShows();
        }
        for (auto row: reader.GetRows(QueryPartInputTag)) {
            row.SetAskedCount(shows);
            writer.AddRow(row, QueryPartOutputTag);
        }
    }
};
REGISTER_REDUCER(TAddShowsReducer)

// "Query", "Domain"
struct TAddWeightForNewMarketReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        TQueryPartWWeight dstMsg;
        float weight = 0;
        for (auto &row: reader.GetRows(NicheHostQuerySampledInputTag)) {
            weight = row.GetWeight();
            Cerr << row.GetTopCorrectedQuery() << " " << row.GetDomain() << Endl;
        }
        for (auto &row: reader.GetRows(QueryPartInputTag)) {
            dstMsg.ClearLandingProblems();
            dstMsg.ClearSimilarPages();
            dstMsg.SetDomain(row.GetDomain());
            dstMsg.SetGroupId(row.GetGroupId());
            dstMsg.SetQuery(row.GetQuery());
            dstMsg.SetAskedCount(row.GetAskedCount());
            dstMsg.SetAveragePosition(row.GetAveragePosition());
            dstMsg.SetClicks(row.GetClicks());
            dstMsg.SetLandingPage(row.GetLandingPage());
            for (auto &problem: row.GetLandingProblems()) {
                dstMsg.AddLandingProblems(static_cast<EUrlProblem>(problem));
            }
            auto similarPages = ParseSimilarUrls(row.GetSimilarPage());
            for (auto &similarPage: similarPages) {
                if (!similarPage.Contains(row.GetDomain())) {
                    dstMsg.AddSimilarPages(similarPage);
                }
            }
            if (row.GetGroupId() == NEW_MARKETS_GROUP_ID_PREFIX) {
                dstMsg.SetWeight(weight);
            }
            writer.AddRow(dstMsg, QueryPartWWeightOutputTag);
        }
    }
};
REGISTER_REDUCER(TAddWeightForNewMarketReducer)

// "Query"
struct TMakeQueriesUniqueReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        while (reader.IsValid()) {
            for (auto &row: reader.GetRows(QueryInfoInputTag)) {
                writer.AddRow(row, QueryInfoOutputTag);
                return;
            }
        }
    }
};
REGISTER_REDUCER(TMakeQueriesUniqueReducer)

void PrepareRivalsBasedOnVisibility(NYT::ITransactionPtr &tx,
                                        TString date,
                                        const THashMap<TString, TString> &radarTypes,
                                        bool needToGetQueries) {
    NComputeGraph::TJobRunner runner;
    const auto& cfg = TConfig::CInstance();

    TString userSessionsTableName = NYTUtils::JoinPath(cfg.TABLE_USER_SESSIONS_DAILY, date);
    TString prsLogTableName = NYTUtils::JoinPath(cfg.TABLE_PRS_LOG_DAILY, date);
    TString pornoQueriesTable = NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "porno_queries");
    TString resultTableName = NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, date);
    TString newMarketTableName = NYTUtils::JoinPath(cfg.TABLE_NICHE_CALCULATED, date);
    TString domain2region2shows = NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "domain2region2shows");
    TString contentAttsTableName = GetJupiterContentAttrsInProdTable(tx);
    TString urlsForWebmasterSimpleTableName = GetJupiterAcceptanceInProdTable(tx);
    TString titleProblemsTableName = "//home/webmaster/prod/stage/export/hostinfo/titles";
    TString descriptionsProblemsTableName = "//home/webmaster/prod/stage/export/hostinfo/descriptions";

    THashSet<int64_t> pornQueriesHashes;
    THashSet<int64_t> cpQueriesHashes;

    auto preparePornoQueries = runner.AddJob([&]() {
        LOG_INFO("start mining porno queries");
        TMapCombineReduceCmd<TGetPornQueriesMapper, TMakeQueriesUniqueReducer, TMakeQueriesUniqueReducer>(tx)
            .Input(TTable<NUserSessions::NProto::TQuery>(tx, DebugPath(userSessionsTableName)), UserSessionInputTag)
            .Input(TTable<NUserSessions::NProto::TQuery>(tx, DebugPath(prsLogTableName)), PrsInputTag)
            .IntermediateMapTag(QueryInfoOutputTag)
            .IntermediateCombineInputTag(QueryInfoInputTag)
            .IntermediateCombineOutputTag(QueryInfoOutputTag)
            .IntermediateReduceTag(QueryInfoInputTag)
            .ReduceBy({"Query", "IsMobile"})
            .Output(TTable<TQueryInfo>(tx, pornoQueriesTable), QueryInfoOutputTag)
            .Do();
        auto reader = tx->CreateTableReader<TQueryInfo>(pornoQueriesTable);
        for (; reader->IsValid(); reader->Next()) {
            auto &row = reader->GetRow();
            auto query = row.GetQuery();
            auto hashedQuery = FnvHash<int64_t>(query);
            if (row.GetIsMobile()) {
                cpQueriesHashes.insert(hashedQuery);
            } else {
                pornQueriesHashes.insert(hashedQuery);
            }
        }
        LOG_INFO("cp queries size: %lu, porn queries size: %lu", cpQueriesHashes.size(), pornQueriesHashes.size());
        LOG_INFO("finish mining porno queries");
    });

    auto prepareData = runner.AddJob([&]() {
        LOG_INFO("start preparing data");

        TMapCmd<TRoundUpTheRegionQueriesMapper>(tx)
            .Input(TTable<NUserSessions::NProto::TQuery>(tx, DebugPath(userSessionsTableName)), UserSessionInputTag)
            .Input(TTable<NUserSessions::NProto::TQuery>(tx, DebugPath(prsLogTableName)), PrsInputTag)
            .Output(TTable<TQueryInfo>(tx, resultTableName), QueryInfoOutputTag)
            .Output(TTable<TQueryToUID>(tx, resultTableName + "_uids"), QueryToUIDOutputTag)
            .AddLocalFile(TConfig::GEOBASE_FILE_LITE)
            .Do();

        LOG_INFO("finish preparing data");
    });

    auto preparedQueries = runner.AddJob([&]() {
        LOG_INFO("start preparing queries");

        TCombineReduceCmd<TLeaveUniqueReducer, TLeaveUniqueReducer>(tx)
            .Input(TTable<TQueryToUID>(tx, resultTableName + "_uids"), QueryToUIDInputTag)
            .Output(TTable<TQueryToUID>(tx, resultTableName + "_uids"), QueryToUIDOutputTag)
            .IntermediateCombineInputTag(QueryToUIDInputTag)
            .IntermediateCombineOutputTag(QueryToUIDOutputTag)
            .IntermediateReduceTag(QueryToUIDInputTag)
            .ReduceBy({"Query", "UID"})
            .Do();

        TMapReduceCmd<void, TGetDomainsUIDCountReducer>(tx)
            .Input(TTable<TQueryToUID>(tx, resultTableName + "_uids"), QueryToUIDInputTag)
            .IntermediateMapTag(QueryToUIDOutputTag)
            .IntermediateReduceTag(QueryToUIDInputTag)
            .Output(TTable<TQueryToUIDCount>(tx, resultTableName + "_uids_counts"), QueryToUIDCountOutputTag)
            .ReduceBy({"Query"})
            .Do();

        DoParallel(
            TSortCmd<TQueryInfo>(tx)
                .Input<TQueryInfo>(resultTableName)
                .Output<TQueryInfo>(resultTableName)
                .By({"Query"}),
            TSortCmd<TQueryToUIDCount>(tx)
                .Input<TQueryToUIDCount>(resultTableName + "_uids_counts")
                .Output<TQueryToUIDCount>(resultTableName + "_uids_counts")
                .By({"Query"})
        );

        TReduceCmd<TFilterQueriesReducer>(tx)
            .Input(TTable<TQueryToUIDCount>(tx, resultTableName + "_uids_counts"), QueryToUIDCountInputTag)
            .Input(TTable<TQueryInfo>(tx, resultTableName), QueryInfoInputTag)
            .Output(TTable<TQueryInfo>(tx, resultTableName), QueryInfoOutputTag)
            .ReduceBy({"Query"})
            .Do();

        TSortCmd<TQueryInfo>(tx)
            .Input<TQueryInfo>(resultTableName)
            .Output<TQueryInfo>(resultTableName)
            .By({"Domain", "Url", "RegionId", "Query", "IsMobile", "Segment"})
            .Do();

        TCombineReduceCmd<TCalculateAverageReducer, TCalculateAverageReducer>(tx)
            .Input(TTable<TQueryInfo>(tx, resultTableName), QueryInfoInputTag)
            .Output(TTable<TQueryInfo>(tx, resultTableName), QueryInfoOutputTag)
            .IntermediateCombineInputTag(QueryInfoInputTag)
            .IntermediateCombineOutputTag(QueryInfoOutputTag)
            .IntermediateReduceTag(QueryInfoInputTag)
            .ReduceBy({"Domain", "Url", "RegionId", "Query", "IsMobile", "Segment"})
            .Do();

        LOG_INFO("finish preparing queries");
    }, { prepareData });

    auto filteredPorn = runner.AddJob([&]() {
        LOG_INFO("start filtering porno queries");

        TMapCmd<TFilterPornQueriesMapper>(tx, new TFilterPornQueriesMapper(cpQueriesHashes, pornQueriesHashes))
            .Input(TTable<TQueryInfo>(tx, resultTableName), QueryInfoInputTag)
            .Input(TTable<TNicheHostQuerySampled>(tx, newMarketTableName), NicheHostQuerySampledInputTag)
            .Output(TTable<TQueryInfo>(tx, resultTableName), QueryInfoOutputTag)
            .Output(TTable<TNicheHostQuerySampled>(tx, newMarketTableName), NicheHostQuerySampledOutputTag)
            .MemoryLimit(2_GBs)
            .Do();

        DoParallel(
            TSortCmd<TQueryInfo>(tx)
                .Input<TQueryInfo>(resultTableName)
                .Output<TQueryInfo>(resultTableName)
                .By({"Domain"}),
            TSortCmd<TNicheHostQuerySampled>(tx)
                .Input<TNicheHostQuerySampled>(newMarketTableName)
                .Output<TNicheHostQuerySampled>(newMarketTableName)
                .By({"Domain"})
        );

        LOG_INFO("finish filtering porno queries");
    }, { preparedQueries, preparePornoQueries });

    auto prepareSimilarPages = runner.AddJob([&](){
        LOG_INFO("start getting similar pages");

        TMapReduceCmd<TGetQueryToPageMapper, TAveragePositionReducer>(tx)
            .Input(TTable<TQueryInfo>(tx, resultTableName), QueryInfoInputTag)
            .Output(TTable<TQueryToUrlWPos>(tx, resultTableName + "_similar_pages"), QueryToUrlWPosOutputTag)
            .IntermediateMapTag(QueryToUrlWPosOutputTag)
            .IntermediateReduceTag(QueryToUrlWPosInputTag)
            .ReduceBy({"Query", "Url", "RegionId"})
            .Do();

        TSortCmd<TQueryToUrlWPos>(tx)
            .Input<TQueryToUrlWPos>(resultTableName + "_similar_pages")
            .Output<TQueryToUrlWPos>(resultTableName + "_similar_pages")
            .By({"Query", "RegionId", "Clicks"})
            .Do();

        TReduceCmd<TLeaveTopUrlReducer>(tx)
            .Input(TTable<TQueryToUrlWPos>(tx, resultTableName + "_similar_pages"), QueryToUrlWPosInputTag)
            .Output(TTable<TQueryToUrl>(tx, resultTableName + "_similar_pages"), QueryToUrlOutputTag)
            .ReduceBy({"Query", "RegionId"})
            .SortBy({"Query", "RegionId", "Clicks"})
            .Do();

        TSortCmd<TQueryToUrl>(tx)
            .Input<TQueryToUrl>(resultTableName + "_similar_pages")
            .Output<TQueryToUrl>(resultTableName + "_similar_pages")
            .By({"Query", "RegionId"})
            .Do();
        LOG_INFO("finish getting similar pages");
    }, { filteredPorn });

    auto prepareRegionWeight = runner.AddJob([&] {
        TMapReduceCmd<void, TSumUpShowsReducer>(tx)
            .Input(TTable<TQueryInfo>(tx, resultTableName), QueryInfoInputTag)
            .Output(TTable<TDomainRegionShows>(tx, domain2region2shows), DomainRegionShowsOutputTag)
            .IntermediateMapTag(QueryInfoOutputTag)
            .IntermediateReduceTag(QueryInfoInputTag)
            .ReduceBy({"Domain", "RegionId"})
            .Do();

        TSortCmd<TDomainRegionShows>(tx)
            .Input<TDomainRegionShows>(domain2region2shows)
            .Output<TDomainRegionShows>(domain2region2shows)
            .By({"Domain"})
            .Do();

        TReduceCmd<TCalculateWeightReducer>(tx)
            .Input(TTable<TDomainRegionShows>(tx, domain2region2shows), TmpDomainRegionShowsInputTag)
            .Input(TTable<TDomainRegionShows>(tx, domain2region2shows), DomainRegionShowsInputTag)
            .Output(TTable<TDomainRegionShows>(tx, domain2region2shows + "_weighted"), DomainRegionShowsOutputTag)
            .ReduceBy({"Domain"})
            .Do();

        TSortCmd<TDomainRegionShows>(tx)
            .Input<TDomainRegionShows>(domain2region2shows + "_weighted")
            .Output<TDomainRegionShows>(domain2region2shows + "_weighted")
            .By({"Domain", "RegionId"})
            .Do();
    }, { filteredPorn });

    // first find q.shows
    auto getShows = runner.AddJob([&] {
        TMapCombineReduceCmd<TCollectAllQueriesMapper, TQueryShowsAddReducer, TQueryShowsAddReducer>(tx)
            .Input(TTable<TQueryInfo>(tx, resultTableName), QueryInfoInputTag)
            .Input(TTable<TGroupIdToQuery>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "export_groups")), GroupIdToQueryInputTag)
            .IntermediateMapTag(WordstatOutputTag)
            .IntermediateCombineInputTag(WordstatInputTag)
            .IntermediateCombineOutputTag(WordstatOutputTag)
            .IntermediateReduceTag(WordstatInputTag)
            .Output(TTable<TWordstat>(tx, resultTableName + "_ws"), WordstatOutputTag)
            .ReduceBy({"Query"})
            .Do();
    }, { filteredPorn });

    // add groups to queries:
    // if there is (domain, group, query) then call groupid = hash(domain, group) and for any query iterate through all groupids
    // also where are special groups: <<new markets>> and <<whole queries>>

    auto addGroups = runner.AddJob([&] {
        TMapCombineReduceCmd<TCollectAllGroupsMapper, TMakeUniqueQueryGroupReducer, TMakeUniqueQueryGroupReducer>(tx)
            .Input(TTable<TGroupIdToQuery>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "export_groups")), GroupIdToQueryInputTag)
            .IntermediateMapTag(GroupIdToQueryOutputTag)
            .IntermediateCombineInputTag(GroupIdToQueryInputTag)
            .IntermediateCombineOutputTag(GroupIdToQueryOutputTag)
            .IntermediateReduceTag(GroupIdToQueryInputTag)
            .Output(TTable<TGroupIdToQuery>(tx, resultTableName + "_gs"), GroupIdToQueryOutputTag)
            .ReduceBy({"Query", "GroupId"})
            .Do();
        TReduceCmd<TJoinGroupsAndShowsForNicheNewMarketsReducer>(tx)
            .Input(TTable<TNicheHostQuerySampled>(tx, newMarketTableName), NicheHostQuerySampledInputTag)
            .Output(TTable<TGroupIdToQuery>(tx, NYT::TRichYPath(resultTableName + "_gs").Append(true)), GroupIdToQueryOutputTag)
            .ReduceBy({"Domain"})
            .Do();
    }, { filteredPorn });


    auto sortedWordstat = runner.AddJob([&]() {
        TSortCmd<TWordstat>(tx)
            .Input<TWordstat>(resultTableName + "_ws")
            .Output<TWordstat>(resultTableName + "_ws")
            .By({"Query"})
            .Do();
    }, { getShows });

    auto sortedGroups = runner.AddJob([&]() {
        TSortCmd<TGroupIdToQuery>(tx)
            .Input<TGroupIdToQuery>(resultTableName + "_gs")
            .Output<TGroupIdToQuery>(resultTableName + "_gs")
            .By({"Query"})
            .Do();
    }, { addGroups });

    THashSet<TString> webmasterHosts;

    auto readWebmasterHosts = runner.AddJob([&]() {
        THashSet<TString> tmp;
        if (!NYTUtils::LoadWebmastersHosts(tx, cfg.TABLE_WEBMASTER_HOSTS, tmp, 4000000)) {
            ythrow yexception() << "there is problem with webmaster hosts table";
        }
        for (auto &url: tmp) {
            webmasterHosts.insert(HostUrlToDomain(url));
        }
    });

    auto joinGroupsAndShows = runner.AddJob([&] {
        LOG_INFO("start joining group ids and shows");
        // сначала оставим топ
        // делаем мапку Domain->количество записей,
        // затем редьюсим и оставляем верхушку из TOP_QUERIES_PERCENT * count запросов
        TReduceCmd<TGetDomainToCountReducer>(tx)
            .Input(TTable<TQueryInfo>(tx, resultTableName), QueryInfoInputTag)
            .Input(TTable<TQueryInfo>(tx, resultTableName), TmpQueryInfoInputTag)
            .Output(TTable<TQueryInfoWithCount>(tx, resultTableName + "_with_counts"), QueryInfoWithCountOutputTag)
            .ReduceBy({"Domain"})
            .Do();

        TMapCmd<TNegateShowsMapper>(tx)
            .Input(TTable<TQueryInfoWithCount>(tx, resultTableName + "_with_counts"), QueryInfoWithCountInputTag)
            .Output(TTable<TQueryInfoWithCount>(tx, resultTableName + "_with_counts"), QueryInfoWithCountOutputTag)
            .Do();

        TSortCmd<TQueryInfoWithCount>(tx)
            .Input<TQueryInfoWithCount>(resultTableName + "_with_counts")
            .Output<TQueryInfoWithCount>(resultTableName + "_with_counts")
            .By({"Domain", "Position", "Shows"})
            .Do();

        TReduceCmd<TGetTopDomainsQueriesReducer>(tx)
            .Input(TTable<TQueryInfoWithCount>(tx, resultTableName + "_with_counts"), QueryInfoWithCountInputTag)
            .Output(TTable<TQueryInfo>(tx, resultTableName + "_sampled"), QueryInfoOutputTag)
            .ReduceBy({"Domain"})
            .SortBy({"Domain", "Position", "Shows"})
            .Do();

        // теперь семплируем топ

        TMapReduceCmd<void, TJoinGroupsAndShowsForNicheReducer>(tx)
            .Input(TTable<TQueryInfo>(tx, resultTableName + "_sampled"), QueryInfoInputTag)
            .IntermediateMapTag(QueryInfoOutputTag)
            .IntermediateReduceTag(QueryInfoInputTag)
            .Output(TTable<TQueryInfo>(tx, resultTableName + "_selected_queries"), QueryInfoOutputTag)
            .ReduceBy({"Domain", "Position"})
            .Do();

        DoParallel(
             TSortCmd<TQueryInfo>(tx)
                .Input<TQueryInfo>(resultTableName + "_selected_queries")
                .Output<TQueryInfo>(resultTableName + "_selected_queries")
                .By({"Query"}),
             TSortCmd<TQueryInfo>(tx)
                .Input<TQueryInfo>(resultTableName)
                .Output<TQueryInfo>(resultTableName)
                .By({"Query"})
        );

        TReduceCmd<TJoinGroupsAndShowsReducer>(tx)
            .Input(TTable<TGroupIdToQuery>(tx, DebugPath(resultTableName + "_gs")), GroupIdToQueryInputTag)
            .Input(TTable<TWordstat>(tx, resultTableName + "_ws"), WordstatInputTag)
            .Input(TTable<TQueryInfo>(tx, resultTableName), QueryInfoInputTag)
            .Input(TTable<TQueryInfo>(tx, resultTableName + "_selected_queries"), TmpQueryInfoInputTag)
            .Input(TTable<TQueryInfo>(tx, resultTableName + "_selected_queries"), Tmp2QueryInfoInputTag)
            .Output(TTable<TQueryInfo>(tx, resultTableName), QueryInfoOutputTag)
            .ReduceBy({"Query"})
            .Do();

        TMapCmd<TFilterNonWebmasterHostsMapper>(tx, new TFilterNonWebmasterHostsMapper(webmasterHosts))
            .Input(TTable<TQueryInfo>(tx, resultTableName), QueryInfoInputTag)
            .Output(TTable<TQueryInfo>(tx, resultTableName), QueryInfoOutputTag)
            .MemoryLimit(2_GBs)
            .Do();

        TSortCmd<TQueryInfo>(tx)
            .Input<TQueryInfo>(resultTableName)
            .Output<TQueryInfo>(resultTableName)
            .By({"Domain", "RegionId", "IsMobile", "Query"})
            .Do();

        LOG_INFO("finish joining group ids and shows");
    }, { sortedWordstat, sortedGroups, filteredPorn, prepareData, readWebmasterHosts });

    auto getTotalPrepared = runner.AddJob([&] {
        TMapReduceCmd<void, TCalculateAverageWithoutRegionIdReducer>(tx)
                .Input(TTable<TQueryInfo>(tx, DebugPath(resultTableName)), QueryInfoInputTag)
                .Output(TTable<TQueryPart>(tx, resultTableName + "_total_prepared"), QueryPartOutputTag)
                .IntermediateMapTag(QueryPartOutputTag)
                .IntermediateReduceTag(QueryInfoInputTag)
                .ReduceBy({"Domain", "GroupId", "Url", "Query"})
                .Do();
    }, { joinGroupsAndShows });

    auto getRivals = runner.AddJob([&] {
        LOG_INFO("start getting rivals for visibilities");

        TMapReduceCmd<void, TCalculateVisibilityPositionShowsReducer>(tx)
            .Input(TTable<TQueryInfo>(tx, resultTableName), QueryInfoInputTag)
            .Output(TTable<TQueryVisibility>(tx, resultTableName + "_total_w_dp"), QueryVisibilityOutputTag)
            .IntermediateMapTag(QueryInfoOutputTag)
            .IntermediateReduceTag(QueryInfoInputTag)
            .ReduceBy({"Domain", "RegionId", "GroupId", "IsMobile", "Query", "Position"})
            .Do();

        TMapReduceCmd<void, TCalculateQueryVisibilityReducer>(tx)
            .Input(TTable<TQueryVisibility>(tx, resultTableName + "_total_w_dp"), QueryVisibilityInputTag)
            .Output(TTable<TQueryVisibility>(tx, resultTableName + "_total_w_dp2"), QueryVisibilityOutputTag)
            .IntermediateMapTag(QueryVisibilityOutputTag)
            .IntermediateReduceTag(QueryVisibilityInputTag)
            .ReduceBy({"Domain", "RegionId", "GroupId", "IsMobile", "Query"})
            .Do();


        TMapReduceCmd<void, TCalculateDomainVisibilityReducer>(tx)
            .Input(TTable<TQueryVisibility>(tx, resultTableName + "_total_w_dp2"), QueryVisibilityInputTag)
            .Output(TTable<TRivalByVisibilityPart>(tx, resultTableName + "_total_w_dp3"), RivalByVisibilityPartOutputTag)
            .IntermediateMapTag(QueryVisibilityOutputTag)
            .IntermediateReduceTag(QueryVisibilityInputTag)
            .ReduceBy({"Domain", "RegionId", "GroupId", "IsMobile"})
            .Do();

        TMapReduceCmd<void, TLeaveOnlyTopRivalsReducer>(tx)
            .Input(TTable<TRivalByVisibilityPart>(tx, resultTableName + "_total_w_dp3"), RivalByVisibilityPartInputTag)
            .Output(TTable<TRivalByVisibilityPart>(tx, resultTableName + "_top_rivals"), RivalByVisibilityPartOutputTag)
            .IntermediateMapTag(RivalByVisibilityPartOutputTag)
            .IntermediateReduceTag(RivalByVisibilityPartInputTag)
            .ReduceBy({"RegionId", "GroupId", "IsMobile"})
            .SortBy({"RegionId", "GroupId", "IsMobile", "RivalPopularity"})
            .Do();

        DoParallel(
            TSortCmd<TRivalByVisibilityPart>(tx)
                .Input<TRivalByVisibilityPart>(resultTableName + "_top_rivals")
                .Output<TRivalByVisibilityPart>(resultTableName + "_top_rivals")
                .By({"RegionId", "GroupId", "IsMobile"}),
            TSortCmd<TRivalByVisibilityPart>(tx)
                .Input<TRivalByVisibilityPart>(resultTableName + "_total_w_dp3")
                .Output<TRivalByVisibilityPart>(resultTableName + "_total_w_dp3")
                .By({"RegionId", "GroupId", "IsMobile"})
        );

        TReduceCmd<TSetRivalsReducer>(tx)
            .Input(TTable<TRivalByVisibilityPart>(tx, resultTableName + "_top_rivals"), TmpRivalByVisibilityPartInputTag)
            .Input(TTable<TRivalByVisibilityPart>(tx, resultTableName + "_total_w_dp3"), RivalByVisibilityPartInputTag)
            .Output(TTable<TRivalByVisibilityPart>(tx, resultTableName + "_total_w_dp3"), RivalByVisibilityPartOutputTag)
            .ReduceBy({"RegionId", "GroupId", "IsMobile"})
            .Do();
        LOG_INFO("finish getting rivals for visibilities");
    }, { joinGroupsAndShows });

    auto prepareRivalsWVis = runner.AddJob([&]{
        TMapCmd<TFilterRivalsDevStuffMapper>(tx)
            .Input(TTable<TRivalByVisibilityPart>(tx, resultTableName + "_total_w_dp3"), RivalByVisibilityPartInputTag)
            .Output(TTable<TRivalByVisibilityPart>(tx, NYTUtils::JoinPath(cfg.TABLE_RIVALS_VISIBILITY_REPORT_ROOT, date)), RivalByVisibilityPartOutputTag)
            .Do();
    }, { getRivals });

    auto prepareOriginalProblems = runner.AddJob([&] {
        if (needToGetQueries) {
            TMapCmd<TGetUrlsForWebmasterSimpleProblemsMapper>(tx)
                .Input(TTable<NJupiter::TAcceptanceUrlForWebMasterRecord>(tx, DebugPath(urlsForWebmasterSimpleTableName).Columns({"Host", "Path", "MainHost", "MainPath", "RelCanonicalTarget", "HttpCode"})), UrlForWebmasterRecordInputTag)
                .Input(TTable<NJupiter::TContentAttrsForWebmaster>(tx, DebugPath(contentAttsTableName).Columns({"Host", "Path", "Title", "MetaDescription"})), ContentAttrsInputTag)
                .Input(TTable<THostContentAttrStatistics>(tx, DebugPath(titleProblemsTableName).Columns({"Host", "Samples"})), TitleHostContentAttrStatisticsInputTag)
                .Input(TTable<THostContentAttrStatistics>(tx, DebugPath(descriptionsProblemsTableName).Columns({"Host", "Samples"})), DescrHostContentAttrStatisticsInputTag)
                .Output(TTable<TQueryPart>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "prepared_webmaster_simple")), QueryPartOutputTag)
                .Do();
            TSortCmd<TQueryPart>(tx)
                .Input<TQueryPart>(NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "prepared_webmaster_simple"))
                .Output<TQueryPart>(NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "prepared_webmaster_simple"))
                .By({"LandingPage"})
                .Do();
            TReduceCmd<TMakeUniqueReducer>(tx)
                .Input(TTable<TQueryPart>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "prepared_webmaster_simple")), QueryPartInputTag)
                .Output(TTable<TQueryPart>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "prepared_webmaster_simple")), QueryPartOutputTag)
                .ReduceBy({"LandingPage"})
                .Do();
            TSortCmd<TQueryPart>(tx)
                .Input<TQueryPart>(NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "prepared_webmaster_simple"))
                .Output<TQueryPart>(NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "prepared_webmaster_simple"))
                .By({"LandingPage"})
                .Do();
        }
    });

    auto tmp = runner.AddJob([&] {
        LOG_INFO("start adding landing problems");
        if (needToGetQueries) {
            TMapCmd<TFilterAllDevStuffMapper>(tx)
                .Input(TTable<TQueryPart>(tx, resultTableName + "_total_prepared"), QueryPartInputTag)
                .Output(TTable<TQueryPart>(tx, resultTableName + "_total_prepared"), QueryPartOutputTag)
                .Do();

            TMapReduceCmd<void, TFilterQueriesOfUrlReducer>(tx)
                .Input(TTable<TQueryPart>(tx, resultTableName + "_total_prepared"), QueryPartInputTag)
                .Output(TTable<TQueryPart>(tx, resultTableName + "_total_prepared"), QueryPartOutputTag)
                .IntermediateMapTag(QueryPartOutputTag)
                .IntermediateReduceTag(QueryPartInputTag)
                .ReduceBy({"Domain", "GroupId", "LandingPage"})
                .Do();

            TSortCmd<TQueryPart>(tx)
                .Input<TQueryPart>(resultTableName + "_total_prepared")
                .Output<TQueryPart>(resultTableName + "_total_prepared")
                .By({"LandingPage"})
                .Do();

            TReduceCmd<TAddProblemsReducer>(tx)
                .Input(TTable<TQueryPart>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "prepared_webmaster_simple")), TmpQueryPartInputTag)
                .Input(TTable<TQueryPart>(tx, resultTableName + "_total_prepared"), QueryPartInputTag)
                .Output(TTable<TQueryPart>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod2")), QueryPartOutputTag)
                .ReduceBy({"LandingPage"})
                .Do();
        }


        DoParallel(
            TSortCmd<TQueryPart>(tx)
                .Input<TQueryPart>(NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod2"))
                .Output<TQueryPart>(NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod2"))
                .By({"Query", "Domain"}),
            TSortCmd<TNicheHostQuerySampled>(tx)
                .Input<TNicheHostQuerySampled>(newMarketTableName)
                .Output<TNicheHostQuerySampled>(newMarketTableName)
                .By({"TopCorrectedQuery", "Domain"}),
            TSortCmd<TGroupIdToQuery>(tx)
                .Input<TGroupIdToQuery>(NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "export_groups"))
                .Output<TGroupIdToQuery>(NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "export_groups"))
                .By({"Query", "Domain"})
        );

        if (needToGetQueries) {
            TReduceCmd<TAddQueriesWithoutLandingsReducer1>(tx)
                .Input(TTable<TQueryPart>(tx, NYT::TRichYPath(NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod2")).RenameColumns({{"Query", "TopCorrectedQuery"}})), QueryPartInputTag)
                .Input(TTable<TNicheHostQuerySampled>(tx, NYT::TRichYPath(newMarketTableName)), NicheHostQuerySampledInputTag)
                .Output(TTable<TQueryPart>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod3")), QueryPartOutputTag)
                .ReduceBy({"TopCorrectedQuery", "Domain"})
                .Do();

            TReduceCmd<TAddQueriesWithoutLandingsReducer2>(tx)
                .Input(TTable<TQueryPart>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod2")), QueryPartInputTag)
                .Input(TTable<TGroupIdToQuery>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "export_groups")), GroupIdToQueryInputTag)
                .Output(TTable<TQueryPart>(tx, NYT::TRichYPath(NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod3")).Append(true)), QueryPartOutputTag)
                .ReduceBy({"Query", "Domain"})
                .Do();

            TMapCmd<TAddQueriesWithoutLandingsMapper>(tx)
                .Input(TTable<TQueryPart>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod2")), QueryPartInputTag)
                .Input(TTable<TQueryPart>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod3")), TmpQueryPartInputTag)
                .Output(TTable<TQueryPart>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod3")), QueryPartOutputTag)
                .Do();
        }



        if (needToGetQueries) {
            TSortCmd<TQueryPart>(tx)
                .Input<TQueryPart>(TTable<TQueryPart>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod3")))
                .Output<TQueryPart>(TTable<TQueryPart>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod3")))
                .By({"Query", "RegionId"})
                .Do();

            TReduceCmd<TSetSimilarPagesReducer>(tx)
                .Input(TTable<TQueryToUrl>(tx, resultTableName + "_similar_pages"), QueryToUrlInputTag)
                .Input(TTable<TQueryPart>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod3")), QueryPartInputTag)
                .Output(TTable<TQueryPart>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod7")), QueryPartOutputTag)
                .ReduceBy({"Query", "RegionId"})
                .Do();

            TMapCmd<TDivideBasedOnLandingPageMapper>(tx)
                .Input(TTable<TQueryPart>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod7")), QueryPartInputTag)
                .Output(TTable<TQueryPart>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod8")), QueryPartOutputTag)
                .Output(TTable<TQueryPart>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod9")), TmpQueryPartOutputTag)
                .Do();

            DoParallel(
                TSortCmd<TQueryPart>(tx)
                    .Input<TQueryPart>(NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod8"))
                    .Output<TQueryPart>(NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod8"))
                    .By({"GroupId", "Domain", "Query", "LandingPage"}),
                TSortCmd<TQueryPart>(tx)
                    .Input<TQueryPart>(NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod9"))
                    .Output<TQueryPart>(NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod9"))
                    .By({"GroupId", "Domain", "Query"})
            );

            TReduceCmd<TRemoveRivalsReducer>(tx)
                .Input(TTable<TQueryPart>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod8")), QueryPartInputTag)
                .Output(TTable<TQueryPart>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod88")), QueryPartOutputTag)
                .ReduceBy({"GroupId", "Domain", "Query", "LandingPage"})
                .Do();

            TReduceCmd<TRemoveRivalsReducer>(tx)
                .Input(TTable<TQueryPart>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod9")), QueryPartInputTag)
                .Output(TTable<TQueryPart>(tx, NYT::TRichYPath(NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod88")).Append(true)), QueryPartOutputTag)
                .ReduceBy({"GroupId", "Domain", "Query"})
                .Do();
        }


        LOG_INFO("finish adding landing problems");
    }, { prepareOriginalProblems, prepareRivalsWVis, prepareSimilarPages, getTotalPrepared });

    runner.AddJob([&]() {
        TMapCmd<TFilterNonWebmasterHostsRivalsMapper>(tx, new TFilterNonWebmasterHostsRivalsMapper(webmasterHosts))
            .Input(TTable<TRivalByVisibilityPart>(tx, NYTUtils::JoinPath(cfg.TABLE_RIVALS_VISIBILITY_REPORT_ROOT, date)), RivalByVisibilityPartInputTag)
            .Output(TTable<TRivalByVisibilityPart>(tx, NYTUtils::JoinPath(cfg.TABLE_RIVALS_VISIBILITY_REPORT_ROOT, date)), RivalByVisibilityPartOutputTag)
            .MemoryLimit(2_GBs)
            .Do();

        if (needToGetQueries) {
            TMapCmd<TFilterNonWebmasterHostsQueriesMapper>(tx, new TFilterNonWebmasterHostsQueriesMapper(webmasterHosts))
                .Input(TTable<TQueryPart>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod88")), QueryPartInputTag)
                .Output(TTable<TQueryPart>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod")), QueryPartOutputTag)
                .MemoryLimit(2_GBs)
                .Do();

            TSortCmd<TQueryPart>(tx)
                .Input<TQueryPart>(NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod"))
                .Output<TQueryPart>(NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod"))
                .By({"Query"})
                .Do();

            TReduceCmd<TAddShowsReducer>(tx)
                .Input(TTable<TWordstat>(tx, resultTableName + "_ws"), WordstatInputTag)
                .Input(TTable<TQueryPart>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod")), QueryPartInputTag)
                .Output(TTable<TQueryPart>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod")), QueryPartOutputTag)
                .ReduceBy({"Query"})
                .Do();

            TSortCmd<TQueryPart>(tx)
                .Input<TQueryPart>(NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod"))
                .Output<TQueryPart>(NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod"))
                .By({"Query", "Domain"})
                .Do();

            TReduceCmd<TAddWeightForNewMarketReducer>(tx)
                .Input(TTable<TNicheHostQuerySampled>(tx, NYT::TRichYPath(newMarketTableName).RenameColumns({{"TopCorrectedQuery", "Query"}})), NicheHostQuerySampledInputTag)
                .Input(TTable<TQueryPart>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod")), QueryPartInputTag)
                .Output(TTable<TQueryPartWWeight>(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report")), QueryPartWWeightOutputTag)
                .ReduceBy({"Query", "Domain"})
                .Do();

            TSortCmd<TQueryPartWWeight>(tx)
                .Input<TQueryPartWWeight>(NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report"))
                .Output<TQueryPartWWeight>(NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report"))
                .By({"Domain", "GroupId", "Query"})
                .Do();

            NYTUtils::SetAttr(tx, NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report"), "upd_date", date);
        }

        tx->Copy(TTable<TRivalByVisibilityPart>(tx, NYTUtils::JoinPath(cfg.TABLE_RIVALS_VISIBILITY_REPORT_ROOT, date)), TTable<TRivalByVisibilityPart>(tx, NYTUtils::JoinPath(cfg.TABLE_RIVALS_VISIBILITY_REPORT_ROOT + "_host2host", date)));

        DoParallel(
            TMapReduceCmd<TAddTypesToRivalsMapper, TSumUpVisibilitiesReducer>(tx, new TAddTypesToRivalsMapper(radarTypes))
                .Input(TTable<TRivalByVisibilityPart>(tx, NYTUtils::JoinPath(cfg.TABLE_RIVALS_VISIBILITY_REPORT_ROOT, date)), RivalByVisibilityPartInputTag)
                .Output(TTable<TRivalReportRow>(tx, NYTUtils::JoinPath(cfg.TABLE_RIVALS_VISIBILITY_REPORT_ROOT, date)), RivalReportRowOutputTag)
                .IntermediateMapTag(RivalByVisibilityPartOutputTag)
                .IntermediateReduceTag(RivalByVisibilityPartInputTag)
                .ReduceBy({"Domain", "GroupId", "IsMobile", "RivalType", "RegionId"}),
            TSortCmd<TRivalByVisibilityPart>(tx)
                .Input<TRivalByVisibilityPart>(NYTUtils::JoinPath(cfg.TABLE_RIVALS_VISIBILITY_REPORT_ROOT + "_host2host", date))
                .Output<TRivalByVisibilityPart>(NYTUtils::JoinPath(cfg.TABLE_RIVALS_VISIBILITY_REPORT_ROOT + "_host2host", date))
                .By({"Domain", "RegionId", "GroupId", "IsMobile"})
        );

        TSortCmd<TRivalReportRow>(tx)
            .Input<TRivalReportRow>(NYTUtils::JoinPath(cfg.TABLE_RIVALS_VISIBILITY_REPORT_ROOT, date))
            .Output<TRivalReportRow>(NYTUtils::JoinPath(cfg.TABLE_RIVALS_VISIBILITY_REPORT_ROOT, date))
            .By({"Domain", "GroupId", "RegionId", "IsMobile"})
            .Do();

        auto removeOptions = NYT::TRemoveOptions().Force(true);
        tx->Remove(resultTableName, removeOptions);
        tx->Remove(resultTableName + "_total_prepared", removeOptions);
        tx->Remove(resultTableName + "_ws", removeOptions);
        tx->Remove(resultTableName + "_similar_pages", removeOptions);
        tx->Remove(resultTableName + "_uids", removeOptions);
        tx->Remove(resultTableName + "_uids_counts", removeOptions);
        tx->Remove(resultTableName + "_sampled", removeOptions);
        tx->Remove(resultTableName + "_total_w_dp2", removeOptions);
        tx->Remove(resultTableName + "_total_w_dp3", removeOptions);
        tx->Remove(resultTableName + "_with_counts", removeOptions);
        tx->Remove(resultTableName + "_top_rivals", removeOptions);
        tx->Remove(resultTableName + "_gs", removeOptions);
        tx->Remove(resultTableName + "_selected_queries", removeOptions);
        tx->Remove(pornoQueriesTable, removeOptions);
        tx->Remove(resultTableName + "_total_w_dp", removeOptions);
        tx->Remove(NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod7"), removeOptions);
        tx->Remove(NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod88"), removeOptions);
        tx->Remove(NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "queries_report_prod"), removeOptions);
        tx->Remove(NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "prepared_webmaster_simple"), removeOptions);
    }, { readWebmasterHosts, prepareRivalsWVis, tmp, prepareRegionWeight });

    runner.Run();
}

void AddRegionWeightToRivals(NYT::ITransactionPtr &tx,
                                TString date) {
    const auto& cfg = TConfig::CInstance();

    TString reportTable = NYTUtils::JoinPath(cfg.TABLE_RIVALS_VISIBILITY_REPORT_ROOT + "_host2host", date);
    TString outputTable = NYTUtils::JoinPath(cfg.TABLE_RIVALS_VISIBILITY_REPORT_ROOT + "_host2host_weighted", date);
    TString weightsTable = NYTUtils::JoinPath(cfg.TABLE_FIRST_REPORT_ROOT, "domain2region2shows_weighted");

    TSortCmd<TRivalByVisibilityPart>(tx)
        .Input<TRivalByVisibilityPart>(reportTable)
        .Output<TRivalByVisibilityPart>(reportTable)
        .By({"Domain", "RegionId", "GroupId", "IsMobile"})
        .Do();

    NYTUtils::CreatePath(tx, cfg.TABLE_RIVALS_VISIBILITY_REPORT_ROOT + "_host2host_weighted");

    TReduceCmd<TAddRegionWeightReducer>(tx)
        .Input(TTable<TDomainRegionShows>(tx, weightsTable), DomainRegionShowsInputTag)
        .Input(TTable<TRivalByVisibilityPart>(tx, reportTable), RivalByVisibilityPartInputTag)
        .Output(TTable<TRivalByVisibilityPart>(tx, outputTable), RivalByVisibilityPartOutputTag)
        .ReduceBy({"Domain", "RegionId"})
        .Do();
}

void AddRivalTypeToRivals(NYT::ITransactionPtr &tx,
                            TString date,
                            const THashMap<TString, TString> &radarTypes) {
    const auto& cfg = TConfig::CInstance();

    TString reportTable = NYTUtils::JoinPath(cfg.TABLE_RIVALS_VISIBILITY_REPORT_ROOT + "_host2host_weighted", date);

    TMapCmd<TAddTypesToRivalsMapper>(tx, new TAddTypesToRivalsMapper(radarTypes))
        .Input(TTable<TRivalByVisibilityPart>(tx, reportTable), RivalByVisibilityPartInputTag)
        .Output(TTable<TRivalByVisibilityPart>(tx, reportTable), RivalByVisibilityPartOutputTag)
        .Do();
}

void CreateVisibilityReport(NYT::ITransactionPtr &tx,
                            TString date) {
    const auto& cfg = TConfig::CInstance();

    TString reportTable = NYTUtils::JoinPath(cfg.TABLE_RIVALS_VISIBILITY_REPORT_ROOT + "_host2host_weighted", date);

    TMapReduceCmd<void, TGetRidOfIsMobileReducer>(tx)
        .Input(TTable<TRivalByVisibilityPart>(tx, reportTable), RivalByVisibilityPartInputTag)
        .Output(TTable<TRivalVisibility>(tx, reportTable), RivalVisibilityOutputTag)
        .IntermediateMapTag(RivalByVisibilityPartOutputTag)
        .IntermediateReduceTag(RivalByVisibilityPartInputTag)
        .ReduceBy({"Domain", "RegionId", "GroupId", "Rival", "RivalType"})
        .Do();

    TSortCmd<TRivalVisibility>(tx)
        .Input<TRivalVisibility>(reportTable)
        .Output<TRivalVisibility>(reportTable)
        .By({"Domain", "GroupId", "RegionId", "Rival"})
        .Do();
}

int TaskPrepareReport(int, const char **) {
    static const TString DATE_LOWER_BOUND = "2022-07-01";
    const auto& cfg = TConfig::CInstance();

    auto arnoldClient = NYT::CreateClient(cfg.MR_SERVER_HOST);
    auto radarClient = NYT::CreateClient(cfg.MR_RADAR_HOST);

    TDeque<NYTUtils::TTableInfo> userSessionsTables;
    LOG_INFO("Looking at %s", cfg.TABLE_USER_SESSIONS_DAILY.c_str());
    NYTUtils::GetTableList(arnoldClient, cfg.TABLE_USER_SESSIONS_DAILY, userSessionsTables);
    std::sort(userSessionsTables.begin(), userSessionsTables.end(), NYTUtils::TTableInfo::TNameLess());


    TDeque<NYTUtils::TTableInfo> radarTables;
    NYTUtils::GetTableList(radarClient, cfg.TABLE_RADAR_CLUSTERS_SOURCE, radarTables);
    std::sort(radarTables.begin(), radarTables.end(), NYTUtils::TTableInfo::TNameGreater());

    TDeque<TString> dates;
    TString lastRadarTable = radarTables.front().Name;
    bool needToGetQueries = false;
    for (auto table : userSessionsTables) {
        auto date = NYTUtils::GetTableName(table.Name);
        if (date < DATE_LOWER_BOUND) {
            continue;
        }
        LOG_INFO("%s is possible", date.c_str());
        auto newMarkets = NYTUtils::JoinPath(cfg.TABLE_NICHE_CALCULATED, date);
        if (arnoldClient->Exists(newMarkets)
            && arnoldClient->Exists(NYTUtils::JoinPath(cfg.TABLE_PRS_LOG_DAILY, date))) {
            LOG_INFO("Want to prepare report for date %s", date.c_str());
            if (!arnoldClient->Exists(NYTUtils::JoinPath(cfg.TABLE_RIVALS_VISIBILITY_REPORT_ROOT, date))) {
                dates.push_back(date);
                break;
            }
        }
    }

    if (!dates.empty() && dates.back() == NYTUtils::GetTableName(userSessionsTables.back().Name)) {
        needToGetQueries = true;
    }

    // dates = {
    //             "2022-07-21",
    //             "2022-07-22",
    //             "2022-07-23"
    // };

    auto reader = radarClient->CreateTableReader<NYT::TNode>(lastRadarTable);
    THashMap<TString, TString> domainToType;
    for (; reader->IsValid(); reader->Next()) {
        auto &row = reader->GetRow();
        TString domain = TString(HostUrlToDomain(row["domain"].AsString()));
        TString type = row["type"].AsString();
        TString punycodeDomain;
        NUtils::IDNHostToAscii(domain, punycodeDomain);
        if (punycodeDomain != "") {
            domainToType[punycodeDomain] = type;
        }
    }
    LOG_INFO("Domain2Type size: %lu", domainToType.size());

    for (auto &date: dates) {
        LOG_INFO("Found radar table: %s", lastRadarTable.c_str());
        LOG_INFO("Preparing report for date %s with [need to get queries] = %d", date.c_str(), needToGetQueries);
        auto tx = arnoldClient->StartTransaction();

        PrepareRivalsBasedOnVisibility(tx, date, domainToType, needToGetQueries);
        AddRegionWeightToRivals(tx, date);
        AddRivalTypeToRivals(tx, date, domainToType);
        CreateVisibilityReport(tx, date);
        tx->Commit();
    }

    return 0;
}

} //namespace NNiche2
} //namespace NWebmaster