#include <util/datetime/cputimer.h>
#include <util/draft/datetime.h>
#include <util/generic/deque.h>
#include <util/generic/hash_set.h>
#include <util/generic/size_literals.h>
#include <util/generic/vector.h>
#include <util/string/reverse.h>

#include <library/cpp/containers/comptrie/comptrie.h>
#include <library/cpp/containers/comptrie/prefix_iterator.h>
//#include <library/cpp/dot_product/dot_product.h>
#include <library/cpp/l2_distance/l2_distance.h>
#include <library/cpp/string_utils/url/url.h>

#include <mapreduce/yt/interface/client.h>
#include <mapreduce/yt/interface/protos/yamr.pb.h>

#include <robot/library/dssm/utils/title_normalization.h>
#include <robot/jupiter/protos/export.pb.h>
#include <robot/jupiter/protos/external/host_mirror.pb.h>
#include <robot/library/yt/static/command.h>
#include <robot/library/yt/static/tags.h>

#include <wmconsole/version3/library/dssm/dssm_utils.h>
#include <wmconsole/version3/library/jupiter/jupiter.h>
#include <robot/library/dssm/utils/title_normalization.h>
#include <wmconsole/version3/processors/user_sessions/exports/catalogia/protos/catalogia.pb.h>
#include <wmconsole/version3/processors/user_sessions/library/utils.h>
#include <wmconsole/version3/processors/user_sessions/protos/user_sessions.pb.h>
#include <wmconsole/version3/protos/queries2.pb.h>
#include <wmconsole/version3/wmcutil/log.h>
#include <wmconsole/version3/wmcutil/math.h>
#include <wmconsole/version3/wmcutil/owners.h>
#include <wmconsole/version3/wmcutil/url.h>
#include <wmconsole/version3/wmcutil/yt/triggers.h>
#include <wmconsole/version3/wmcutil/yt/yt_utils.h>

#include "config.h"
#include "utils.h"

#include "task_user_sessions.h"

namespace NWebmaster {
namespace NCatalogia {

using namespace NJupiter;

static const TInputTag<NProto::TCatalogiaFilter> FilterInputTag                     (1);
static const TInputTag<NJupiter::TContentAttrsForWebmaster> ContentAttrsInputTag    (2);
static const TInputTag<NProto::TCatalogiaQueryIntm> CatalogiaIntmInputTag           (3);
static const TOutputTag<NProto::TCatalogiaQueryIntm> CatalogiaIntmOutputTag         (1);
static const TOutputTag<NProto::TCatalogiaQuery> CatalogiaOutputTag                 (2);
static const TOutputTag<NProto::TCatalogiaFilter> FilterOutputTag                   (3);

const float OPERATION_WEIGHT = 5.0;

const NYT::TSortColumns & FIELDS_URLS_QUERIES() {
    const static NYT::TSortColumns fields = {
        "Host",
        "CorrectedQuery",
        "Path",
        "Clicks",
        "IsMetrikaRobot",
        "IsMobile",
        "IsPad",
        "Position",
        "RegionId",
        "RequestSource",
        "ResultSource",
        "Shows",
        "Timestamp",
        "PrsLogL2Relevance",
        "UpperQueryNavPred",
        "UpperPornoUpperPl",
        "UpperDocNavPred0",
    };

    return fields;
}

struct TQueriesMapper : public NYT::IMapper<NYT::TTableReader<NUserSessions::NProto::TQuery>, NYT::TTableWriter<NProto::TCatalogiaQueryIntm>> {
    Y_SAVELOAD_JOB(MirrorsMap, TrieStream, RobotsMap, Mode)

    enum EParseMode {
        E_MODE_QUERIES,
        E_MODE_PRSLOG,
    };

public:
    TQueriesMapper() = default;
    TQueriesMapper(const THashMap<TString, TString> &mirrorsMap, const TVector<char> &trieStream, THashMap<TString, TString> &robotsMap, EParseMode mode)
        : MirrorsMap(mirrorsMap)
        , TrieStream(trieStream)
        , RobotsMap(robotsMap)
        , Mode(mode)
    {
    }

public:
    void Start(TWriter* /*writer*/) override {
        Trie.Init(&TrieStream[0], TrieStream.size());
    }

    void Do(TReader *input, TWriter *output) override {
        static const THashSet<TString> OWNERS_SKIP_LIST = {
            "yandex.by",
            "yandex.com",
            "yandex.com.tr",
            "yandex.kz",
            "yandex.ru",
            "yandex.ua",
        };

        THolder<TRobotsCanonizerV2> canonizer;
        TString canonizingHost;

        NProto::TCatalogiaQueryIntm dstMsg;
        for (; input->IsValid(); input->Next()) {
            const auto &row = input->GetRow();
            switch(Mode) {
                case E_MODE_QUERIES:
                    if (!NUserSessions::IsVisibleQueryInWebmaster(row)) {
                        continue;
                    }
                    break;
                case E_MODE_PRSLOG:
                    if (!NUserSessions::IsVisiblePrsLogQuery(row)) {
                        continue;
                    }
                    break;
                default:
                    ythrow yexception() << "unknown mode";
            };

            const TString &host = row.GetHost();
            TString rhost = host;
            ReverseInPlace(rhost);

            THashSet<TString> owners;
            bool found = false;
            for (auto it = MakePrefixIterator(Trie, rhost.data(), rhost.size()); it; ++it) {
                const TString owner = host.substr(host.size() - it.GetPrefixLen());

                if (NUtils::IsSubdomain(host, owner)) {
                    found = true;
                    owners.insert(owner);
                }
            }

            if (!found) {
                continue;
            }

            const TStringBuf owner = NUtils::GetMetrikaOwner(host);
            if (OWNERS_SKIP_LIST.contains(owner)) {
                continue;
            }

            TString mainMirror = host;
            if (const auto it = MirrorsMap.find(host)) {
                mainMirror = it->second;
            }

            const TString mainUrl = TString::Join(mainMirror, row.GetPath());
            if (mainMirror != canonizingHost) {
                canonizer.Reset(new TRobotsCanonizerV2(RobotsMap));
                canonizingHost = mainMirror;
            }

            const TString canonizedPath = canonizer->GetCanonizedPath(mainUrl, mainMirror, row.GetPath());
            const TUrlId urlId = GetUrlId(mainMirror, canonizedPath);
            const TQueryId queryId = GetQueryId(row.GetCorrectedQuery());

            dstMsg.SetHost(mainMirror);
            dstMsg.SetPath(canonizedPath);
            dstMsg.SetUrlId(urlId);
            dstMsg.SetQuery(row.GetCorrectedQuery());
            dstMsg.SetQueryId(queryId);
            dstMsg.SetRegionId(row.GetRegionId());
            dstMsg.SetPosition(row.GetPosition());
            dstMsg.SetClicks(row.GetClicks());
            dstMsg.SetShows(row.GetShows());
            dstMsg.SetUrlMaxTimestamp(row.GetTimestamp());
            dstMsg.SetQueryMinTimestamp(row.GetTimestamp());
            dstMsg.SetPrsLogL2Relevance(row.GetPrsLogL2Relevance());
            dstMsg.SetUpperQueryNavPred(row.GetUpperQueryNavPred());
            dstMsg.SetUpperPornoUpperPl(row.GetUpperPornoUpperPl());
            dstMsg.SetUpperDocNavPred0(row.GetUpperDocNavPred0());

            for (const TString &owner : owners) {
                dstMsg.SetDomain(TString{owner});
                output->AddRow(dstMsg);
            }
        }
    }

public:
    THashMap<TString, TString> MirrorsMap;
    TVector<char> TrieStream;
    TCompactTrie<char> Trie;
    THashMap<TString, TString> RobotsMap;
    EParseMode Mode;
};

REGISTER_MAPPER(TQueriesMapper)

//ReduceBy Domain, Host, Path, Query, RegionId, Position
struct TQueriesReducer : public NYT::IReducer<NYT::TTableReader<NProto::TCatalogiaQueryIntm>, NYT::TTableWriter<NProto::TCatalogiaQueryIntm>> {
    void Do(TReader *input, TWriter *output) {
        auto firstRow = input->GetRow();
        size_t shows = 0;
        size_t clicks = 0;
        size_t maxUrlTs = firstRow.GetUrlMaxTimestamp();
        size_t minQueryTs = firstRow.GetQueryMinTimestamp();
        for (; input->IsValid(); input->Next()) {
            const auto &row = input->GetRow();
            clicks += row.GetClicks();
            shows += row.GetShows();
            maxUrlTs = Max<size_t>(maxUrlTs, row.GetUrlMaxTimestamp());
            minQueryTs = Min<size_t>(minQueryTs, row.GetQueryMinTimestamp());
        }
        firstRow.SetUrlMaxTimestamp(maxUrlTs);
        firstRow.SetQueryMinTimestamp(minQueryTs);
        firstRow.SetClicks(clicks);
        firstRow.SetShows(shows);
        output->AddRow(firstRow);
    }
};

REGISTER_REDUCER(TQueriesReducer)

struct TUrlsFilterMapper : public NYT::IMapper<NYT::TTableReader<NProto::TCatalogiaQueryIntm>, NYT::TTableWriter<NProto::TCatalogiaFilter>> {
    void Do(TReader *input, TWriter *output) {
        struct TCounter {
            size_t UrlMaxTimestamp = 0;
            size_t Records = 0;
        };

        THashMap<TString, THashMap<TString, TCounter>> counters;
        for (; input->IsValid(); input->Next()) {
            const auto &row = input->GetRow();
            TCounter &counter = counters[row.GetHost()][row.GetPath()];
            counter.UrlMaxTimestamp = Max<size_t>(counter.UrlMaxTimestamp, row.GetUrlMaxTimestamp());
            counter.Records++;
        }

        NProto::TCatalogiaFilter dstMsg;
        for (const auto &hostObj : counters) {
            dstMsg.SetHost(hostObj.first);
            for (const auto &pathObj : hostObj.second) {
                dstMsg.SetPath(pathObj.first);
                const TCounter &counter = pathObj.second;
                dstMsg.SetUrlMaxTimestamp(counter.UrlMaxTimestamp);
                dstMsg.SetRecords(counter.Records);
                output->AddRow(dstMsg);
            }
        }
    }
};

REGISTER_MAPPER(TUrlsFilterMapper)

//ReduceBy Host, Path
struct TUrlsFilterReducer : public NYT::IReducer<NYT::TTableReader<NProto::TCatalogiaFilter>, NYT::TTableWriter<NProto::TCatalogiaFilter>> {
    void Do(TReader *input, TWriter *output) {
        auto firstRow = input->GetRow();
        size_t mts = firstRow.GetUrlMaxTimestamp();
        size_t records = 0;
        for (; input->IsValid(); input->Next()) {
            const auto &row = input->GetRow();
            mts = Max<size_t>(mts, row.GetUrlMaxTimestamp());
            records += Max<size_t>(row.GetRecords(), 1);
        }

        firstRow.SetUrlMaxTimestamp(mts);
        firstRow.SetRecords(records);
        output->AddRow(firstRow);
    }
};

REGISTER_REDUCER(TUrlsFilterReducer)

struct TQueriesFilterMapper : public NYT::IMapper<NYT::TTableReader<NProto::TCatalogiaQueryIntm>, NYT::TTableWriter<NProto::TCatalogiaFilter>> {
    struct TCounter {
        size_t QueryMinTimestamp = Max<size_t>();
        size_t Records = 0;
    };

    void FlushCounters(THashMap<TString, TCounter> &counters, TWriter *output) {
        NProto::TCatalogiaFilter dstMsg;
        for (const auto &queryObj : counters) {
            dstMsg.SetQuery(queryObj.first);
            const TCounter &counter = queryObj.second;
            dstMsg.SetQueryMinTimestamp(counter.QueryMinTimestamp);
            dstMsg.SetRecords(counter.Records);
            output->AddRow(dstMsg);
        }
        counters.clear();
    }

    void Do(TReader *input, TWriter *output) {
        const size_t FLUSH_THRESHOLD = 500000;

        THashMap<TString, TCounter> counters;
        for (; input->IsValid(); input->Next()) {
            const auto &row = input->GetRow();
            TCounter &counter = counters[row.GetQuery()];
            counter.QueryMinTimestamp = Min<size_t>(counter.QueryMinTimestamp, row.GetQueryMinTimestamp());
            counter.Records++;
            if (counters.size() > FLUSH_THRESHOLD) {
                FlushCounters(counters, output);
            }
        }

        FlushCounters(counters, output);
    }
};

REGISTER_MAPPER(TQueriesFilterMapper)

//ReduceBy Query
struct TQueriesFilterReducer : public NYT::IReducer<NYT::TTableReader<NProto::TCatalogiaFilter>, NYT::TTableWriter<NProto::TCatalogiaFilter>> {
    void Do(TReader *input, TWriter *output) {
        auto firstRow = input->GetRow();
        size_t mts = firstRow.GetQueryMinTimestamp();
        size_t records = 0;
        for (; input->IsValid(); input->Next()) {
            const auto &row = input->GetRow();
            mts = Min<size_t>(mts, row.GetQueryMinTimestamp());
            records += Max<size_t>(row.GetRecords(), 1);
        }

        firstRow.SetQueryMinTimestamp(mts);
        firstRow.SetRecords(records);
        output->AddRow(firstRow);
    }
};

REGISTER_REDUCER(TQueriesFilterReducer)

struct TJoinQueryContentReducer : public TTaggedReducer {
    TJoinQueryContentReducer()
        : NowTimestamp(Now().Seconds())
    {
    }

    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        TMaybe<NProto::TCatalogiaFilter> filter = reader.GetRowMaybe(FilterInputTag);
        reader.SkipRows(FilterInputTag);
        if (!reader.IsValid() || !filter.Defined()) {
            return;
        }

        const ui32 queryMinTimestamp = filter.GetRef().GetQueryMinTimestamp();
        const ui32 queryAgeSeconds = NowTimestamp - queryMinTimestamp;

        for (auto row : reader.GetRows(CatalogiaIntmInputTag)) {
            row.SetQueryAgeSeconds(queryAgeSeconds);
            row.SetQueryMinTimestamp(queryMinTimestamp);
            writer.AddRow(row, CatalogiaIntmOutputTag);
        }
    }

public:
    const ui32 NowTimestamp = 0;
};

REGISTER_REDUCER(TJoinQueryContentReducer)

//ReduceBy Host, Path
struct TJoinUrlContentReducer : public TTaggedReducer {
    TJoinUrlContentReducer()
        : NowTimestamp(Now().Seconds())
    {
    }

    void StartTagged(TTagedWriter) override {
        TDssmL3Model::Ptr dssmL3Model(new TDssmL3Model);
        DssmCtr.Reset(new TDssmCtr(dssmL3Model));
        DssmCtrNoMiner.Reset(new TDssmCtrNoMiner(dssmL3Model));
        DssmUta.Reset(new TDssmUta(dssmL3Model));
    }

    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        const size_t MAX_RECORDS_PER_URL = 5000000;

        TMaybe<NProto::TCatalogiaFilter> filter = reader.GetRowMaybe(FilterInputTag);
        reader.SkipRows(FilterInputTag);
        if (!reader.IsValid() || !filter.Defined()) {
            return;
        }

        if (filter.GetRef().GetRecords() > MAX_RECORDS_PER_URL) {
            return;
        }

        TMaybe<NJupiter::TContentAttrsForWebmaster> contentAttrs = reader.GetRowMaybe(ContentAttrsInputTag);
        reader.SkipRows(ContentAttrsInputTag);
        if (!reader.IsValid()) {
            return;
        }

        TString title, metaDescription;
        if (contentAttrs.Defined()) {
            title = contentAttrs.GetRef().GetTitleRawUTF8();
            metaDescription = contentAttrs.GetRef().GetMetaDescription();
        }

        TMaybe<NProto::TCatalogiaQueryIntm> firstRow = reader.GetRowMaybe(CatalogiaIntmInputTag);

        const TString url = firstRow.GetRef().GetHost() + firstRow.GetRef().GetPath();
        const TString urlUta = TUrlUTA::CInstance().Get(url);
        const TString normalizedTitle = NDssm::NormalizeTitleUtf8(title);
        const ui32 urlMaxTimestamp = filter.GetRef().GetUrlMaxTimestamp();
        const ui32 urlAgeSeconds = NowTimestamp - urlMaxTimestamp;

        TVector<float> docEmbeddingCtr;
        TVector<float> docEmbeddingCtrNoMiner;
        TVector<float> docEmbeddingUta;

        TVector<float> queryEmbeddingCtr;
        TVector<float> queryEmbeddingCtrNoMiner;
        TVector<float> queryEmbeddingUta;

        DssmCtr->GetDocEmbedding(url, urlUta, normalizedTitle, docEmbeddingCtr);
        DssmCtrNoMiner->GetDocEmbedding(url, normalizedTitle, docEmbeddingCtrNoMiner);
        DssmUta->GetDocEmbedding(url, urlUta, normalizedTitle, docEmbeddingUta);

        const float dssmTopValues[3] = {1.0, 1.0, 1.0};
        const auto RoundD3 = NUtils::Round<-3, double>;

        float ctrJointOutput = 0;
        float ctrNoMinerJointOutput = 0;
        float utaJointOutput = 0;
        float topL2 = 0;
        float dssmValues[3] = {0, 0, 0};

        TQueryId currentQueryId;
        bool dssmValid = false;
        for (auto row : reader.GetRows(CatalogiaIntmInputTag)) {
            if (currentQueryId != row.GetQueryId()) {
                currentQueryId = row.GetQueryId();
                try {
                    const TString normalizedQuery = FastNormalizeRequest(row.GetQuery(), false);
                    DssmCtr->GetQueryEmbedding(normalizedQuery, queryEmbeddingCtr);
                    DssmCtrNoMiner->GetQueryEmbedding(normalizedQuery, queryEmbeddingCtrNoMiner);
                    DssmUta->GetQueryEmbedding(normalizedQuery, queryEmbeddingUta);

                    ctrJointOutput = SoftSign(DssmCtr->GetJointOutput(docEmbeddingCtr, queryEmbeddingCtr));
                    ctrNoMinerJointOutput = SoftSign(DssmCtrNoMiner->GetJointOutput(docEmbeddingCtrNoMiner, queryEmbeddingCtrNoMiner));
                    utaJointOutput = SoftSign(DssmUta->GetJointOutput(docEmbeddingUta, queryEmbeddingUta));

                    dssmValues[0] = ctrJointOutput;
                    dssmValues[1] = ctrNoMinerJointOutput;
                    dssmValues[2] = utaJointOutput;
                    topL2 = L2SqrDistance(dssmValues, dssmTopValues, 3);
                    dssmValid = true;
                } catch (yexception &) {
                    dssmValid = false;
                    continue;
                }
            }

            row.SetTitle(title);
            row.SetMetaDescription(metaDescription);
            row.SetUrlAgeSeconds(urlAgeSeconds);
            row.SetUrlMaxTimestamp(urlMaxTimestamp);
            if (dssmValid) {
                row.SetDssmCtr(RoundD3(ctrJointOutput));
                row.SetDssmCtrNoMiner(RoundD3(ctrNoMinerJointOutput));
                row.SetDssmUta(RoundD3(utaJointOutput));
                row.SetDssmTopL2(RoundD3(topL2));
            }
            writer.AddRow(row, CatalogiaIntmOutputTag);
        }
    }

public:
    const ui32 NowTimestamp = 0;
    THolder<TDssmCtr> DssmCtr;
    THolder<TDssmCtrNoMiner> DssmCtrNoMiner;
    THolder<TDssmUta> DssmUta;
};

REGISTER_REDUCER(TJoinUrlContentReducer)

struct TUserSessionTablesConfig {
    TUserSessionTablesConfig(
        const TDeque<NYTUtils::TTableInfo> &inputTables,
        const TString &sourceRoot,
        const TString &exportTable
    )
        : Inputs(inputTables)
        , Temp(NYTUtils::JoinPath(sourceRoot, "intm"))
        , QueriesFilter(NYTUtils::JoinPath(sourceRoot, "queries-flt"))
        , UrlsFilter(NYTUtils::JoinPath(sourceRoot, "urls-flt"))
        , Output(exportTable)
    {
    }

public:
    const TDeque<NYTUtils::TTableInfo> &Inputs;
    const TString Temp;
    const TString QueriesFilter;
    const TString UrlsFilter;
    const TString Output;
};

void ProcessUserSessions(NYT::IClientBasePtr clientMain, const TUserSessionTablesConfig &tablesCfg, TQueriesMapper::EParseMode mode) {
    const auto &cfg = TConfig::CInstance();
    THashSet<TString> domains;
    TVector<char> domainsTrieStream;
    NYT::IClientPtr clientCatalogia = NYT::CreateClient(cfg.MR_SERVER_HOST_CATALOGIA);
    LoadCatalogiaDomains(clientCatalogia, cfg.TABLE_SOURCE_CATALOGIA_DOMAINS, domains, domainsTrieStream);
    LOG_INFO("user_sessions, domains %lu, trie %lu bytes", domains.size(), domainsTrieStream.size());

    NYT::ITransactionPtr tx = clientMain->StartTransaction();

    TDeque<TTable<NUserSessions::NProto::TQuery>> inputTables;
    for (auto &table : tablesCfg.Inputs) {
        TTable<NUserSessions::NProto::TQuery> inputTable(tx, table.Name);
        inputTables.push_back(inputTable.SelectFields(FIELDS_URLS_QUERIES()));
        LOG_INFO("user_sessions, input %s", table.Name.c_str());
    }

    LOG_INFO("user_sessions, tmp %s", tablesCfg.Temp.c_str());
    LOG_INFO("user_sessions, output %s", tablesCfg.Output.c_str());

    THashMap<TString, TString> robotsMap;
    LoadCatalogiaRobots(clientMain, cfg.TABLE_CATALOGIA_SOURCE_ROBOTS_FLT, robotsMap);
    LOG_INFO("user_sessions, robots %lu", robotsMap.size());

    THashMap<TString, TString> mirrorsMap;
    LoadCatalogiaMirrors(clientMain, cfg.TABLE_CATALOGIA_SOURCE_MIRRORS_FLT, mirrorsMap);
    LOG_INFO("user_sessions, mirrors %lu", mirrorsMap.size());

    TMapCombineReduceCmd<TQueriesMapper, TQueriesReducer, TQueriesReducer>(
        tx,
        new TQueriesMapper(mirrorsMap, domainsTrieStream, robotsMap, mode),
        new TQueriesReducer,
        new TQueriesReducer
    )
        .OperationWeight(OPERATION_WEIGHT)
        .Inputs(inputTables)
        .Output(TTable<NProto::TCatalogiaQueryIntm>(tx, tablesCfg.Temp))
        .MapperMemoryLimit(1_GBs)
        .MapperCpuLimit(0.2)
        .ReducerCpuLimit(0.2)
        .ReduceBy({"Domain", "Host", "Path", "Query", "RegionId", "Position"})
        .Do()
    ;

    DoParallel(
        TSortCmd<NProto::TCatalogiaQueryIntm>(tx, TTable<NProto::TCatalogiaQueryIntm>(tx, tablesCfg.Temp))
            .OperationWeight(OPERATION_WEIGHT)
            .By({"Query"}),
        TMapCombineReduceCmd<TQueriesFilterMapper, TQueriesFilterReducer, TQueriesFilterReducer>(tx)
            .OperationWeight(OPERATION_WEIGHT)
            .Input(TTable<NProto::TCatalogiaQueryIntm>(tx, tablesCfg.Temp)
                .SelectFields({"Query", "QueryMinTimestamp"})
            )
            .Output(TTable<NProto::TCatalogiaFilter>(tx, tablesCfg.QueriesFilter))
            .ReduceBy({"Query"})
            .MapperCpuLimit(0.2)
            .ReducerCpuLimit(0.2),
        TMapCombineReduceCmd<TUrlsFilterMapper, TUrlsFilterReducer, TUrlsFilterReducer>(tx)
            .OperationWeight(OPERATION_WEIGHT)
            .Input(TTable<NProto::TCatalogiaQueryIntm>(tx, tablesCfg.Temp)
                .SelectFields({"Host", "Path", "UrlMaxTimestamp"})
            )
            .Output(TTable<NProto::TCatalogiaFilter>(tx, tablesCfg.UrlsFilter))
            .ReduceBy({"Host", "Path"})
            .MapperMemoryLimit(3_GBs)
            .MapperCpuLimit(0.2)
            .ReducerCpuLimit(0.2)
    );

    DoParallel(
        TSortCmd<NProto::TCatalogiaFilter>(tx, TTable<NProto::TCatalogiaFilter>(tx, tablesCfg.QueriesFilter))
            .OperationWeight(OPERATION_WEIGHT)
            .By({"Query"}),
        TSortCmd<NProto::TCatalogiaFilter>(tx, TTable<NProto::TCatalogiaFilter>(tx, tablesCfg.UrlsFilter))
            .OperationWeight(OPERATION_WEIGHT)
            .By({"Host", "Path"})
    );

    TReduceCmd<TJoinQueryContentReducer>(tx)
        .OperationWeight(OPERATION_WEIGHT)
        .Input(TTable<NProto::TCatalogiaFilter>(tx, tablesCfg.QueriesFilter), FilterInputTag)
        .Input(TTable<NProto::TCatalogiaQueryIntm>(tx, tablesCfg.Temp), CatalogiaIntmInputTag)
        .Output(TTable<NProto::TCatalogiaQueryIntm>(tx, tablesCfg.Temp), CatalogiaIntmOutputTag)
        .ReduceBy({"Query"})
        .CpuLimit(0.2)
        .Do()
    ;

    TSortCmd<NProto::TCatalogiaQueryIntm>(tx, TTable<NProto::TCatalogiaQueryIntm>(tx, tablesCfg.Temp))
        .OperationWeight(OPERATION_WEIGHT)
        .By({"Host", "Path", "Domain", "Query"})
        .Do()
    ;

    TReduceCmd<TJoinUrlContentReducer>(tx)
        .OperationWeight(OPERATION_WEIGHT)
        .Input(TTable<NProto::TCatalogiaFilter>(tx, tablesCfg.UrlsFilter), FilterInputTag)
        .Input(TTable<NJupiter::TContentAttrsForWebmaster>(tx, GetJupiterContentAttrsInProdTable(tx)), ContentAttrsInputTag)
        .Input(TTable<NProto::TCatalogiaQueryIntm>(tx, tablesCfg.Temp), CatalogiaIntmInputTag)
        .Output(TTable<NProto::TCatalogiaQueryIntm>(tx, tablesCfg.Output)
            .AsSortedOutput({"Host", "Path", "Domain", "Query"}), CatalogiaIntmOutputTag
        )
        .ReduceBy({"Host", "Path"})
        .AddYtFile(TCommonYTConfig::CInstance().FILE_L3_MODEL_DSSM)
        .MemoryLimit(2_GBs)
        .Do()
    ;

    TSortCmd<NProto::TCatalogiaQueryIntm>(tx, TTable<NProto::TCatalogiaQueryIntm>(tx, tablesCfg.Output))
        .OperationWeight(OPERATION_WEIGHT)
        .By({"Host", "Path", "Domain", "Query"})
        .Do()
    ;

    SetYtAttr(tx, tablesCfg.Output, TAttrName::UploadTime, Now().ToStringLocalUpToSeconds());
    tx->Commit();
}

int TaskUserSessionsV2(int, const char **) {
    const auto &cfg = TConfig::CInstance();
    TSimpleTimer timer;

    NYT::IClientPtr clientMain = NYT::CreateClient(cfg.MR_SERVER_HOST_MAIN);
    NYTUtils::CreatePath(clientMain, cfg.TABLE_CATALOGIA_SOURCE_ROOT);

    TDeque<NYTUtils::TTableInfo> tables;
    NYTUtils::GetTableList(clientMain, TCommonYTConfigSQ::CInstance().TABLE_PARSED_USER_SESSIONS_DAILY_ROOT, tables);
    std::sort(tables.rbegin(), tables.rend(), NYTUtils::TTableInfo::TNameLess());
    if (tables.size() > cfg.INPUT_DAYS) {
        tables.resize(cfg.INPUT_DAYS);
    }

    TUserSessionTablesConfig tablesCfg(
        tables,
        cfg.TABLE_CATALOGIA_SOURCE_QUERIES_V2_ROOT,
        cfg.TABLE_CATALOGIA_EXPORT_QUERIES_V2
    );

    ProcessUserSessions(clientMain, tablesCfg, TQueriesMapper::E_MODE_QUERIES);

    return 0;
}

int TaskUserSessionsV2Prs(int, const char **) {
    const auto &cfg = TConfig::CInstance();
    TSimpleTimer timer;

    NYT::IClientPtr clientMain = NYT::CreateClient(cfg.MR_SERVER_HOST_MAIN);
    NYTUtils::CreatePath(clientMain, cfg.TABLE_CATALOGIA_SOURCE_ROOT);

    TDeque<NYTUtils::TTableInfo> tables;
    NYTUtils::GetTableList(clientMain, TCommonYTConfigSQ::CInstance().TABLE_PARSED_USER_SESSIONS_PRSLOG_DAILY_ROOT, tables);
    std::sort(tables.rbegin(), tables.rend(), NYTUtils::TTableInfo::TNameLess());
    if (tables.size() > cfg.INPUT_DAYS) {
        tables.resize(cfg.INPUT_DAYS);
    }

    TUserSessionTablesConfig tablesCfg(
        tables,
        cfg.TABLE_CATALOGIA_SOURCE_QUERIES_V2_PRS_ROOT,
        cfg.TABLE_CATALOGIA_EXPORT_QUERIES_V2_PRS
    );

    ProcessUserSessions(clientMain, tablesCfg, TQueriesMapper::E_MODE_PRSLOG);

    return 0;
}

} //namespace NCatalogia
} //namespace NWebmaster
