#include <util/generic/hash_set.h>
#include <util/generic/size_literals.h>

#include <robot/library/yt/static/command.h>

#include <wmconsole/version3/library/jupiter/jupiter.h>
#include <wmconsole/version3/wmcutil/thread.h>
#include <wmconsole/version3/wmcutil/yt/triggers.h>
#include <wmconsole/version3/processors/tools/host2vec/applier/protos/tables.pb.h>
#include <wmconsole/version3/processors/tools/host2vec/utils/utils.h>
#include <wmconsole/version3/processors/user_sessions/library/utils.h>
#include <wmconsole/version3/processors/user_sessions/niche/conf/config.h>
#include <wmconsole/version3/processors/user_sessions/niche/miner/tables.pb.h>
#include <wmconsole/version3/processors/user_sessions/protos/user_sessions.pb.h>

#include <wmconsole/version3/wmcutil/yt/misc.h>

#include "task_import_queries_and_rivals.h"

namespace NWebmaster {
namespace NNiche {
using namespace NJupiter;

static const THashMap<int, double> POSITION_CLICK_SHARE = {
    {0, 0.44145802},
    {1, 0.17935543},
    {2, 0.0991559},
    {3, 0.06437186},
    {4, 0.04511521},
    {5, 0.033906512},
    {6, 0.026201464},
    {7, 0.02084791},
    {8, 0.017130397},
    {9, 0.0148711195},
    {10, 0.012429338},
    {11, 0.00802707},
    {12, 0.005058365},
    {13, 0.0037565376},
    {14, 0.00320557},
    {15, 0.002985383},
    {16, 0.0021870318},
    {17, 0.0015464955},
    {18, 0.0012920866},
    {19, 0.0012179656},
    {20, 0.0008981123},
    {21, 0.0008094015},
    {22, 0.0007817494},
    {23, 0.0007629992},
    {24, 0.00074897404},
    {25, 0.0006287237},
    {26, 0.00057357876},
    {27, 0.00054003723},
    {28, 0.0005263504},
    {29, 0.0005377767},
    {30, 0.00039226116},
    {31, 0.0003618701},
    {32, 0.00035197384},
    {33, 0.00034606113},
    {34, 0.00034189475},
    {35, 0.00029397398},
    {36, 0.00027331087},
    {37, 0.00025987063},
    {38, 0.0002549697},
    {39, 0.00026186308},
    {40, 0.00021976128},
    {41, 0.00020842045},
    {42, 0.00020256363},
    {43, 0.00019855474},
    {44, 0.00019574503},
    {45, 0.00018173702},
    {46, 0.0001716505},
    {47, 0.00016451556},
    {48, 0.00016322076},
    {49, 0.00016969365},
    {50, 0.004058719},
};

static const TInputTag<NUserSessions::NProto::TQuery> UserSessionInputTag(1);
static const TOutputTag<NProto::TUserSessionQueryCountStat> UserSessionQueryCountOutputTag(2);
static const TInputTag<NProto::THostQuery> TQueriesSourceInputTag(3);
static const TInputTag<NProto::TUserSessionQueryCountStat> UserSessionQueryCountInputTag(4);
static const TInputTag<NProto::TUserSessionIntersecQuery> UserSessionCrossQueryInputTag(5);
static const TOutputTag<NProto::TUserSessionIntersecQuery> UserSessionCrossQueryOutputTag(6);
static const TOutputTag<NProto::TSelectedRival> SelectedRivalOutputTag(7);
static const TInputTag<NProto::TSelectedRival> SelectedRivalInputTag(8);
static const TOutputTag<NProto::TRivalHostQueryCount> RivalQueryCountOutputTag(9);
static const TInputTag<NProto::TRivalHostQueryCount> RivalQueryCountInputTag(10);
static const TInputTag<NProto::TUserSessionIntersecQuery> HostQueryPopularityInputTag(11);

static const TOutputTag<NProto::TQueryPopularity> PopularityOutputTag(11);
static const TInputTag<NProto::TQueryPopularity> PopularityInputTag(12);
static const TOutputTag<NProto::TNicheRivalReport> NicheRivalReportOutputTag(13);

static NYT::TRichYPath DebugPath(const TString& table) {
    NYT::TRichYPath path(table);
    //path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("https://hd.kinopoisk.ru"))));
    //path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("https://webmaster.yandex.ru"))));
    return path;
}

struct TUserSessionQueryCountReduce: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter write) override {
        NProto::TUserSessionQueryCountStat result;
        ui64 count = 0;
        double position = 0;
        double clickPositionShare = 0;
        NUserSessions::NProto::TQuery last;
        for (auto row : reader.GetRows(UserSessionInputTag)) {
            if (!NUserSessions::IsVisibleQueryInWebmaster(row) || row.GetUpperQueryNavPred() >= 0.5 || row.GetIsNav() == true) {
                continue;
            }
            last = row;
            position += row.GetPosition();
            clickPositionShare += POSITION_CLICK_SHARE.at(row.GetPosition());
            count++;
        }
        if (count > 0) {
            result.SetHost(last.GetHost());
            result.SetQuery(last.GetCorrectedQuery());
            result.SetRegionId(last.GetRegionId());
            result.SetIsMobile(last.GetIsMobile());
            result.SetIsPad(last.GetIsPad());
            result.SetCount(count);
            result.SetPosition(position / count);
            result.SetClickPositionShare(clickPositionShare / count);
            write.AddRow(result, UserSessionQueryCountOutputTag);
        }
    }
};

REGISTER_REDUCER(TUserSessionQueryCountReduce)

struct TReduceQueriesReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter write) override {
        NProto::TUserSessionQueryCountStat result;
        ui64 countValue = 0;
        double position = 0;
        double clickPositionShare = 0;
        bool first = true;
        for (auto row : reader.GetRows(UserSessionQueryCountInputTag)) {
            if (first) {
                result.SetHost(row.GetHost());
                result.SetQuery(row.GetQuery());
                result.SetRegionId(row.GetRegionId());
                result.SetIsMobile(row.GetIsMobile());
                result.SetIsPad(row.GetIsPad());
                first = false;
            }
            position += row.GetPosition() * row.GetCount();
            clickPositionShare += row.GetClickPositionShare() * row.GetCount();
            countValue += row.GetCount();
        }
        result.SetCount(countValue);
        result.SetPosition(position / countValue);
        result.SetClickPositionShare(clickPositionShare / countValue);
        write.AddRow(result, UserSessionQueryCountOutputTag);
    }
};

REGISTER_REDUCER(TReduceQueriesReducer)

struct TJoinQueriesPopularityReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter write) override {
        TMaybe<NProto::TQueryPopularity> popularity = reader.GetSingleRowMaybe(PopularityInputTag);
        if (!popularity.Defined()) {
            return;
        }
        for (auto row : reader.GetRows(UserSessionQueryCountInputTag)) {
            row.SetPopularity(popularity->GetPopularity());
            write.AddRow(row, UserSessionQueryCountOutputTag);
        }
    }
};

REGISTER_REDUCER(TJoinQueriesPopularityReducer)

struct TQueriesCrossJoinReduce: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter write) override {
        TDeque<NProto::THostQuery> hosts;
        for (auto queryRow : reader.GetRows(TQueriesSourceInputTag)) {
            hosts.push_back(queryRow);
        }
        NProto::TUserSessionIntersecQuery result;
        int count = 40000;
        for (auto row : reader.GetRows(UserSessionQueryCountInputTag)) {
            count--;
            result.SetHost(row.GetHost());
            result.SetQuery(row.GetQuery());
            result.SetRegionId(row.GetRegionId());
            result.SetIsMobile(row.GetIsMobile());
            result.SetIsPad(row.GetIsPad());
            result.SetPosition(row.GetPosition());
            result.SetPopularity(row.GetPopularity());
            result.SetClickPositionShare(row.GetClickPositionShare());
            result.SetCount(row.GetCount());
            for (auto mainHost : hosts) {
                result.SetMainHost(mainHost.GetHost());
                result.SetQuerySourceFlag(mainHost.GetSourceFlag());
                result.SetShare(mainHost.GetShare());
                write.AddRow(result, UserSessionCrossQueryOutputTag);
            }
            if (count <= 0) {
                break;
            }
        }
        reader.SkipRows(UserSessionQueryCountInputTag);
    }
};

REGISTER_REDUCER(TQueriesCrossJoinReduce)

struct TRivalQueriesCountReduce: public TTaggedReducer {
public:
    TRivalQueriesCountReduce() = default;
    TRivalQueriesCountReduce(bool localWordstat) : LocalWordstat(localWordstat) {
    }

    void Save(IOutputStream& stream) const override {
        ::Save(&stream, LocalWordstat);
        TTaggedReducer::Save(stream);
    }

    void Load(IInputStream& stream) override {
        ::Load(&stream, LocalWordstat);
        TTaggedReducer::Load(stream);
    }

    void DoTagged(TTagedReader reader, TTagedWriter write) override {
        NProto::TRivalHostQueryCount result;
        double count = 0;
        for (auto row : reader.GetRows(UserSessionCrossQueryInputTag)) {
            if (count == 0) {
                result.SetMainHost(row.GetMainHost());
                result.SetHost(row.GetHost());
            }
            count += row.GetCount() * (LocalWordstat ? row.GetShare() : 1.0);
        }
        result.SetCount(-count);
        write.AddRow(result, RivalQueryCountOutputTag);
    }
private:
    bool LocalWordstat;
};
REGISTER_REDUCER(TRivalQueriesCountReduce)

struct TRivalQueriesLimitReduce: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter write) override {
        int count = 20;
        bool findMainHost = false;
        TString mainHost;
        for (auto row : reader.GetRows(RivalQueryCountInputTag)) {
            mainHost = row.GetMainHost();
            //Для дальнейшего надо что бы главный хост был своим же дубликатом, так проще
            if (row.GetMainHost() == row.GetHost()) {
                findMainHost = true;
                count++;
            }
            write.AddRow(row, RivalQueryCountOutputTag);
            count--;
            if (count == 0) {
                break;
            }
        }
        if (!findMainHost) {
            NProto::TRivalHostQueryCount result;
            result.SetMainHost(mainHost);
            result.SetHost(mainHost);
            result.SetCount(999999);
            write.AddRow(result, RivalQueryCountOutputTag);
        }
        reader.SkipRows(RivalQueryCountInputTag);
    }
};

REGISTER_REDUCER(TRivalQueriesLimitReduce)

struct TRivalSessionQueryJoin: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter write) override {
        TMaybe<NProto::TRivalHostQueryCount> rivalQuery = reader.GetSingleRowMaybe(RivalQueryCountInputTag);
        if (!reader.IsValid() || rivalQuery.Empty()) {
            return;
        }
        NProto::TSelectedRival rival;
        for (auto row : reader.GetRows(UserSessionCrossQueryInputTag)) {
            rival.SetHost(row.GetMainHost());
            rival.SetRival(row.GetHost());
            write.AddRow(row, UserSessionCrossQueryOutputTag);
        }
        write.AddRow(rival, SelectedRivalOutputTag);
    }
};

REGISTER_REDUCER(TRivalSessionQueryJoin)

struct TUserSessionQueryRemoveDuplication: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        writer.AddRow(*reader.GetLastRowMaybe(UserSessionCrossQueryInputTag), UserSessionCrossQueryOutputTag);
    }
};

REGISTER_REDUCER(TUserSessionQueryRemoveDuplication)

struct TReduceSelectedRivalReducer: public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        writer.AddRow(*reader.GetLastRowMaybe(SelectedRivalInputTag), SelectedRivalOutputTag);
    }
};

REGISTER_REDUCER(TReduceSelectedRivalReducer)

struct TPrepareUserSessionReportReducer: public TTaggedReducer {
public:
    TPrepareUserSessionReportReducer() = default;
    TPrepareUserSessionReportReducer(bool localWordstat) : LocalWordstat(localWordstat) {
    }

    void Save(IOutputStream& stream) const override {
        ::Save(&stream, LocalWordstat);
        TTaggedReducer::Save(stream);
    }

    void Load(IInputStream& stream) override {
        ::Load(&stream, LocalWordstat);
        TTaggedReducer::Load(stream);
    }

    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        NProto::TNicheRivalReport result;
        double popularity = 0.0;
        TMaybe<NProto::TUserSessionIntersecQuery> mbQueryPopularity = reader.GetSingleRowMaybe(HostQueryPopularityInputTag);
        if (mbQueryPopularity.Empty()) {
            return;
        }
        double totalPopularity = mbQueryPopularity->GetCount() * (LocalWordstat ? mbQueryPopularity->GetShare() : 1.0);
        for (auto row : reader.GetRows(UserSessionCrossQueryInputTag)) {
            if (row.GetHost() != result.GetRival()) {
                // flush data
                if (!result.GetRival().empty()) {
                    result.SetVisibility(popularity / totalPopularity);
                    result.SetPopularity(popularity);
                    writer.AddRow(result, NicheRivalReportOutputTag);
                }
                popularity = 0.0;
                result.SetHost(row.GetMainHost());
                result.SetRival(row.GetHost());
                result.SetRegionId(row.GetRegionId());
                result.SetIsMobile(row.GetIsMobile());
                result.SetIsPad(row.GetIsPad());
                result.SetQuerySourceFlag(row.GetQuerySourceFlag());
            }
            popularity += row.GetCount() * row.GetClickPositionShare() * (LocalWordstat ? row.GetShare() : 1.0);
        }
        if (!result.GetRival().empty()) {
            result.SetPopularity(popularity);
            result.SetVisibility(popularity / totalPopularity);
            writer.AddRow(result, NicheRivalReportOutputTag);
            // fake rival for total stats
            result.SetRival("");
            result.SetPopularity(totalPopularity);
            result.SetVisibility(0.0); // for remove
            writer.AddRow(result, NicheRivalReportOutputTag);
        }
    }
private:
    bool LocalWordstat;
};
REGISTER_REDUCER(TPrepareUserSessionReportReducer)

struct TCalcTotalHostQueryPopularity : public NYT::IReducer<NYT::TTableReader<NProto::THostQueryImport>,
                                                NYT::TTableWriter<NProto::TUserSessionIntersecQuery>> {
public:
    void Do(TReader* input, TWriter* output) override {
        NProto::TUserSessionIntersecQuery result;
        ui64 count = 0;
        double share = 0.0;
        for (; input->IsValid(); input->Next()) {
            const NProto::THostQueryImport &row = input->GetRow();
            if (count == 0) {
                result.SetMainHost(row.GetHost());
                result.SetRegionId(row.GetRegionId());
                result.SetIsMobile(row.GetIsMobile());
                result.SetIsPad(row.GetIsPad());
                result.SetQuerySourceFlag(row.GetSourceFlag());
            }
            count += row.GetCount();
            share += row.GetShare();
        }
        result.SetCount(count);
        result.SetShare(share);
        output->AddRow(result);
    }
};
REGISTER_REDUCER(TCalcTotalHostQueryPopularity)

void PrepareNicheReport(NYT::IClientBasePtr tx, const TString &userSessionDate, bool localWordstat) {
    const auto& config = TConfig::CInstance();
    TString tableSuffix = localWordstat ? "_lws" : "";

    TReduceCmd<TRivalQueriesCountReduce>(tx, new TRivalQueriesCountReduce(localWordstat))
        .Input(TTable<NProto::TUserSessionIntersecQuery>(tx, DebugPath(config.TABLE_TMP_CROSS_SESSION_QUERY_INFO + "0")), UserSessionCrossQueryInputTag)
        .Output(TTable<NProto::TRivalHostQueryCount>(tx, config.TABLE_TMP_RIVAL_QUERY_COUNT + "0" + tableSuffix), RivalQueryCountOutputTag)
        .ReduceBy({"MainHost", "Host"})
        .Do();

    TSortCmd<NProto::TRivalHostQueryCount>(tx, TTable<NProto::TRivalHostQueryCount>(tx, config.TABLE_TMP_RIVAL_QUERY_COUNT + "0" + tableSuffix))
        .By({"MainHost", "Count", "Host"})
        .Do();

    //Выбираем 20 конкурентов с самым большим кол-вом запросов
    TReduceCmd<TRivalQueriesLimitReduce>(tx, new TRivalQueriesLimitReduce)
        .Input(TTable<NProto::TRivalHostQueryCount>(tx, DebugPath(config.TABLE_TMP_RIVAL_QUERY_COUNT + "0" + tableSuffix)), RivalQueryCountInputTag)
        .Output(TTable<NProto::TRivalHostQueryCount>(tx, config.TABLE_TMP_RIVAL_QUERY_COUNT + "1" + tableSuffix), RivalQueryCountOutputTag)
        .ReduceBy({"MainHost"})
        .SortBy({"MainHost", "Count"})
        .Do();

    TSortCmd<NProto::TRivalHostQueryCount>(tx, TTable<NProto::TRivalHostQueryCount>(tx, config.TABLE_TMP_RIVAL_QUERY_COUNT + "1" + tableSuffix))
        .By({"MainHost", "Host"})
        .Do();

    LOG_INFO("Filter user session by rivals queries.");
    //Фильтруем ссессии которых нет в списке конкурентов и формируем список выбранных конкурентов
    TReduceCmd<TRivalSessionQueryJoin>(tx)
        .Input(TTable<NProto::TRivalHostQueryCount>(tx, DebugPath(config.TABLE_TMP_RIVAL_QUERY_COUNT + "1" + tableSuffix)), RivalQueryCountInputTag)
        .Input(TTable<NProto::TUserSessionIntersecQuery>(tx, config.TABLE_TMP_CROSS_SESSION_QUERY_INFO + "0"), UserSessionCrossQueryInputTag)
        .Output(TTable<NProto::TUserSessionIntersecQuery>(tx, config.TABLE_TMP_CROSS_SESSION_QUERY_INFO + "1" + tableSuffix), UserSessionCrossQueryOutputTag)
        .Output(TTable<NProto::TSelectedRival>(tx, config.TABLE_TMP_SELECTED_RIVAL + tableSuffix).AsSortedOutput({"Host", "Rival"}), SelectedRivalOutputTag)
        .ReduceBy({"MainHost", "Host"})
        .Do();

    LOG_INFO("Prepare data for rival queries report");
    //Подготавливаем даные для отчета по нишам
    TSortCmd<NProto::TUserSessionIntersecQuery>(tx, TTable<NProto::TUserSessionIntersecQuery>(tx, config.TABLE_TMP_CROSS_SESSION_QUERY_INFO + "1" + tableSuffix))
        .By({"MainHost", "RegionId", "IsMobile", "IsPad", "QuerySource", "Host"})
        .Do();
    //Формируем данные для отчета по нишам

    LOG_INFO("Prepare Niche Rival report.");
    TReduceCmd<TPrepareUserSessionReportReducer>(tx, new TPrepareUserSessionReportReducer(localWordstat))
        .Input(TTable<NProto::TUserSessionIntersecQuery>(tx, config.TABLE_TMP_TOTAL_POPULARITY + "1"), HostQueryPopularityInputTag)
        .Input(TTable<NProto::TUserSessionIntersecQuery>(tx, DebugPath(config.TABLE_TMP_CROSS_SESSION_QUERY_INFO + "1" + tableSuffix)), UserSessionCrossQueryInputTag)
        .Output(TTable<NProto::TNicheRivalReport>(tx, config.TABLE_TMP_QUERIES_RIVAL_REPORT + tableSuffix), NicheRivalReportOutputTag)
        .ReduceBy({"MainHost", "RegionId", "IsMobile", "IsPad", "QuerySource"})
        .SortBy({"MainHost", "RegionId", "IsMobile", "IsPad", "QuerySource", "Host"})
        .Do();
    TSortCmd<NProto::TNicheRivalReport>(tx, TTable<NProto::TNicheRivalReport>(tx, config.TABLE_TMP_QUERIES_RIVAL_REPORT + tableSuffix))
        .By({"Host", "Rival", "RegionId", "IsMobile", "IsPad", "QuerySource"})
        .Do();

    LOG_INFO("Prepare rival report.");
    //Убираем дубликаты конкурентов
    TReduceCmd<TReduceSelectedRivalReducer>(tx)
        .Input(TTable<NProto::TSelectedRival>(tx, config.TABLE_TMP_SELECTED_RIVAL + tableSuffix), SelectedRivalInputTag)
        .Output(TTable<NProto::TSelectedRival>(tx, config.TABLE_TMP_SELECTED_RIVAL + tableSuffix), SelectedRivalOutputTag)
        .ReduceBy({"Host", "Rival"})
        .Do();

    TSortCmd<NProto::TSelectedRival>(tx, TTable<NProto::TSelectedRival>(tx, config.TABLE_TMP_SELECTED_RIVAL + tableSuffix))
        .By({"Host", "Rival"})
        .Do();

    TSortCmd<NProto::TUserSessionIntersecQuery>(tx, TTable<NProto::TUserSessionIntersecQuery>(tx, config.TABLE_TMP_CROSS_SESSION_QUERY_INFO + "1" + tableSuffix))
        .By({"Host", "Query", "RegionId", "IsMobile", "IsPad", "QuerySource"})
        .Do();
    //Проходимся на случае если есть задублированные сеcсии
    TReduceCmd<TUserSessionQueryRemoveDuplication>(tx)
        .Input(TTable<NProto::TUserSessionIntersecQuery>(tx, config.TABLE_TMP_CROSS_SESSION_QUERY_INFO + "1" + tableSuffix), UserSessionCrossQueryInputTag)
        .Output(TTable<NProto::TUserSessionIntersecQuery>(tx, config.TABLE_TMP_CROSS_SESSION_QUERY_INFO + "2" + tableSuffix), UserSessionCrossQueryOutputTag)
        .ReduceBy({"Host", "Query", "RegionId", "IsMobile", "IsPad", "QuerySource"})
        .Do();
    TSortCmd<NProto::TUserSessionIntersecQuery>(tx, TTable<NProto::TUserSessionIntersecQuery>(tx, config.TABLE_TMP_CROSS_SESSION_QUERY_INFO + "2" + tableSuffix))
        .By({"Host", "Query", "RegionId", "IsMobile", "IsPad", "QuerySource"})
        .Do();

    //Вырезанные сессии конкурентов, нужна для построения графика с выбранными запросами, конкурентами( в mvp не делаем)
    tx->Copy(config.TABLE_TMP_CROSS_SESSION_QUERY_INFO + "2" + tableSuffix, NYTUtils::JoinPath(config.TABLE_IMPORT_RIVAL_QUERIES + tableSuffix, userSessionDate), NYT::TCopyOptions{}.Force(true));
    //Отчет для формирования графика по нишам.
    tx->Copy(config.TABLE_TMP_QUERIES_RIVAL_REPORT + tableSuffix, NYTUtils::JoinPath(config.TABLE_IMPORT_QUERIES_RIVAL_REPORT + tableSuffix, userSessionDate), NYT::TCopyOptions{}.Force(true));
    //Список хостов которе мы считаем конкурентами
    tx->Copy(config.TABLE_TMP_SELECTED_RIVAL + tableSuffix, NYTUtils::JoinPath(config.TABLE_IMPORT_RIVAL + tableSuffix, userSessionDate), NYT::TCopyOptions{}.Force(true));
}

int ImportQueriesAndRivals(int, const char**) {
    const auto& config = TConfig::CInstance();
    NYT::IClientBasePtr client = NYT::CreateClient(config.MR_SERVER_HOST);
    auto tx = client->StartTransaction();
    TString lastProcessedUserSessionDate;
    try {
        lastProcessedUserSessionDate = NYTUtils::GetAttr(tx, config.TABLE_USER_SESSION_ROOT, TAttrName::LastMinedUserSession).AsString();
    } catch (yexception &e) {
    }
    LOG_INFO("Last processed table %s", lastProcessedUserSessionDate.c_str());
    TString userSessionTable = config.LoadFirstUnprocessedUserSession(client, config.TABLE_USER_SESSION_ROOT, lastProcessedUserSessionDate);
    if (userSessionTable.Empty()) {
        LOG_INFO("No fresh user sessions found");
        return 0;
    }
    TString userSessionDate = NYTUtils::GetTableName(userSessionTable);

    LOG_INFO("Prepare user session table.");
    //Удаляем дублирующие запросы по одному хосту, и делаем для них avg по позиции, и ClickPositionShare
    TReduceCmd<TUserSessionQueryCountReduce>(tx)
        .Input(TTable<NUserSessions::NProto::TQuery>(tx, DebugPath(userSessionTable)), UserSessionInputTag)
        .Output(TTable<NProto::TUserSessionQueryCountStat>(tx, config.TABLE_TMP_USER_SESSION_QUERY_COUNT + "0").AsSortedOutput({"Host", "Query"}), UserSessionQueryCountOutputTag)
        .ReduceBy({"Host", "CorrectedQuery", "Path", "RegionId", "IsMobile", "IsPad"})
        .Do();

    TSortCmd<NProto::TUserSessionQueryCountStat>(tx, TTable<NProto::TUserSessionQueryCountStat>(tx, config.TABLE_TMP_USER_SESSION_QUERY_COUNT + "0"))
        .By({"Query", "RegionId", "IsMobile", "IsPad", "Host"})
        .Do();

    //Избавляемся от пути(Path), в запросе, и опять делаем средние по позиции и ClickPositionShare
    TReduceCmd<TReduceQueriesReducer>(tx, new TReduceQueriesReducer)
        .Input(TTable<NProto::TUserSessionQueryCountStat>(tx, config.TABLE_TMP_USER_SESSION_QUERY_COUNT + "0"), UserSessionQueryCountInputTag)
        .Output(TTable<NProto::TUserSessionQueryCountStat>(tx, config.TABLE_TMP_USER_SESSION_QUERY_COUNT + "1")
                    .AsSortedOutput({"Query", "RegionId", "IsMobile", "IsPad", "Host"}),
                UserSessionQueryCountOutputTag)
        .ReduceBy({"Query", "RegionId", "IsMobile", "IsPad", "Host"})
        .Do();

    LOG_INFO("Join prepared user session and popularity table.");
    //Добавляем Popularity
    TReduceCmd<TJoinQueriesPopularityReducer>(tx, new TJoinQueriesPopularityReducer)
        .Input(TTable<NProto::TQueryPopularity>(tx, config.TABLE_TMP_POPULARITY), PopularityInputTag)
        .Input(TTable<NProto::TUserSessionQueryCountStat>(tx, DebugPath(config.TABLE_TMP_USER_SESSION_QUERY_COUNT + "1")), UserSessionQueryCountInputTag)
        .Output(TTable<NProto::TUserSessionQueryCountStat>(tx, config.TABLE_TMP_USER_SESSION_QUERY_COUNT + "2")
                    .AsSortedOutput({"Query", "RegionId", "IsMobile", "IsPad", "Host"}),
                UserSessionQueryCountOutputTag)
        .ReduceBy({"Query", "RegionId", "IsMobile", "IsPad"})
        .Do();

    LOG_INFO("CrossJoin queries reduce.");
    //Берем 40k запросов из ранне намайненой ниши(берем топ), и делаем CrossJoin с текущий пользовательской сессией
    //Нужно что бы потом намайнить конкурентов исходя из ниши.
    TReduceCmd<TQueriesCrossJoinReduce>(tx, new TQueriesCrossJoinReduce)
        .Input(TTable<NProto::THostQuery>(tx, DebugPath(config.TABLE_TMP_HOST_QUERY)), TQueriesSourceInputTag)
        .Input(TTable<NProto::TUserSessionQueryCountStat>(tx, config.TABLE_TMP_USER_SESSION_QUERY_COUNT + "2"), UserSessionQueryCountInputTag)
        .Output(TTable<NProto::TUserSessionIntersecQuery>(tx, config.TABLE_TMP_CROSS_SESSION_QUERY_INFO + "0"), UserSessionCrossQueryOutputTag)
        .ReduceBy({"Query"})
        .Do();
    TSortCmd<NProto::TUserSessionIntersecQuery>(tx, TTable<NProto::TUserSessionIntersecQuery>(tx, config.TABLE_TMP_CROSS_SESSION_QUERY_INFO + "0"))
        .By({"MainHost", "Host"})
        .Do();

    LOG_INFO("Calc main host queries total popularity");
    TSortCmd<NProto::THostQueryImport>(tx)
        .Input(TTable<NProto::THostQueryImport>(tx, config.TABLE_TMP_HOST_QUERY_IMPORT))
        .Output(TTable<NProto::THostQueryImport>(tx, config.TABLE_TMP_TOTAL_POPULARITY + "0"))
        .By({"Host", "RegionId", "IsMobile", "IsPad", "SourceFlag"})
        .Do();
    TReduceCmd<TCalcTotalHostQueryPopularity>(tx)
        .Input(TTable<NProto::THostQueryImport>(tx, config.TABLE_TMP_TOTAL_POPULARITY + "0"))
        .Output(TTable<NProto::TUserSessionIntersecQuery>(tx, config.TABLE_TMP_TOTAL_POPULARITY + "1"))
        .ReduceBy({"Host", "RegionId", "IsMobile", "IsPad", "SourceFlag"})
        .Do();

    TSortCmd<NProto::TUserSessionIntersecQuery>(tx, TTable<NProto::TUserSessionIntersecQuery>(tx, config.TABLE_TMP_TOTAL_POPULARITY + "1"))
        .By({"MainHost", "RegionId", "IsMobile", "IsPad", "QuerySource", "Host"})
        .Do();

    LOG_INFO("Prepare rivals.");
    // глобальный вордстат
    auto prepareNicheReport = [&] {
        PrepareNicheReport(tx, userSessionDate, false);
    };
    // локальный вордстат (пока отключен)
    /*auto prepareLocalNicheReport = [&] {
        PrepareNicheReport(tx, userSessionDate, true);
    };*/
    NUtils::RunAsync(prepareNicheReport);

    NYTUtils::SetAttr(tx, config.TABLE_USER_SESSION_ROOT, TAttrName::LastMinedUserSession, userSessionDate);
    tx->Commit();
    return 0;
}

}
}
