#include <util/draft/date.h>
#include <util/generic/hash_set.h>
#include <util/generic/set.h>
#include <util/string/join.h>

#include <library/cpp/string_utils/url/url.h>

#include <wmconsole/version3/wmcutil/datetime.h>
#include <wmconsole/version3/wmcutil/log.h>
#include <wmconsole/version3/wmcutil/regex.h>
#include <wmconsole/version3/wmcutil/yt/yt_runner.h>
#include <wmconsole/version3/wmcutil/yt/yt_utils.h>

#include <wmconsole/version3/protos/queries2.pb.h>

#include "config.h"
#include "monitor.h"
#include "queries.h"
#include "week_config.h"

namespace NWebmaster {

namespace {
const char *ATTR_MOST_RECENT_SOURCE_NAME    = "most_recent_source";
const char *ATTR_LEAST_RECENT_SOURCE_NAME   = "least_recent_source";

const char *F_CLICKS        = "Clicks";
const char *F_CLICKS_INV    = "ClicksInv";
const char *F_CLICKS_SPLIT  = "ClicksSplit";
const char *F_GROUP_ID      = "GroupId";
const char *F_GROUP_NAME    = "GroupName";
const char *F_PERIOD        = "Period";
const char *F_POSITION      = "Position";
const char *F_SHOWS         = "Shows";
const char *F_SHOWS_INV     = "ShowsInv";
const char *F_HOST          = "Host";
const char *F_PATH          = "Path";
const char *F_YAMR_KEY      = "key";
const char *F_YAMR_SUBKEY   = "subkey";
const char *F_YAMR_VALUE    = "value";

const char *F_OLD           = "Old";
const char *F_NEW           = "New";
const char *F_DIFF          = "Diff";
const char *F_DIFF_PERCENT  = "DiffPercent";

const char *F_POS_QUERIES_SHOWS     = "PosQueriesShowsSamples";
const char *F_POS_QUERIES_CLICKS    = "PosQueriesClicksSamples";
const char *F_POS_QUERIES_POSITION  = "PosQueriesPositionSamples";
const char *F_NEG_QUERIES_SHOWS     = "NegQueriesShowsSamples";
const char *F_NEG_QUERIES_CLICKS    = "NegQueriesClicksSamples";
const char *F_NEG_QUERIES_POSITION  = "NegQueriesPositionSamples";

const char *F_POS_GROUPS_SHOWS      = "PosGroupsShowsSamples";
const char *F_POS_GROUPS_CLICKS     = "PosGroupsClicksSamples";
const char *F_POS_GROUPS_POSITION   = "PosGroupsPositionSamples";
const char *F_NEG_GROUPS_SHOWS      = "NegGroupsShowsSamples";
const char *F_NEG_GROUPS_CLICKS     = "NegGroupsClicksSamples";
const char *F_NEG_GROUPS_POSITION   = "NegGroupsPositionSamples";
}

static NYT::TRichYPath DebugPath(const TString &table) {
    NYT::TRichYPath path(table);
    //path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("https://lenta.ru"))));
    //path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("https://webmaster.yandex.ru"))));
    return path;
}

template<class T>
void Serialize(const T &src, TString &dst) {
    TStringOutput so(dst);
    ::Save(&so, src);
}

template<class T>
void Deserialize(const TString &src, T &dst) {
    TStringInput si(src);
    ::Load(&si, dst);
}

struct TExtractAllQueriesMapper : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    Y_SAVELOAD_JOB(WebmasterHosts, TablesConfig, WeekConfig)

    TExtractAllQueriesMapper() = default;
    TExtractAllQueriesMapper(const THashSet<TString> &webmasterHosts, const THashMap<ui32, time_t> &tablesConfig, const TDigestWeekConfig &weekConfig)
        : WebmasterHosts(webmasterHosts)
        , TablesConfig(tablesConfig)
        , WeekConfig(weekConfig)
    {
    }

public:
    void Do(TReader *input, TWriter *output) override {
        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();
            const TString host = row[F_YAMR_KEY].AsString();
            if (!WebmasterHosts.contains(host)) {
                continue;
            }

            const time_t tableTimestamp = TablesConfig.at(input->GetTableIndex());
            proto::queries2::QueryMessage msg;
            Y_PROTOBUF_SUPPRESS_NODISCARD msg.ParseFromString(row[F_YAMR_VALUE].AsString());

            TMap<time_t, long> clicks;
            TCounter oldCounter, newCounter;
            if (WeekConfig.OldWeek.In(tableTimestamp)) {
                oldCounter.Add(msg);
                clicks[tableTimestamp] = oldCounter.Clicks;
            } else if (WeekConfig.NewWeek.In(tableTimestamp)) {
                newCounter.Add(msg);
                clicks[tableTimestamp] = newCounter.Clicks;
            };

            TString clicksStream;
            Serialize(clicks, clicksStream);

            output->AddRow(NYT::TNode()
                (F_HOST, host)
                (F_CLICKS_SPLIT, clicksStream)
                (F_OLD, NYT::TNode()
                    (F_POSITION, oldCounter.Shows > 0 ? oldCounter.GetPosition() : 0)
                    (F_SHOWS, oldCounter.Shows)
                    (F_CLICKS, oldCounter.Clicks)
                )
                (F_NEW, NYT::TNode()
                    (F_POSITION, newCounter.Shows > 0 ? newCounter.GetPosition() : 0)
                    (F_SHOWS, newCounter.Shows)
                    (F_CLICKS, newCounter.Clicks)
                )
            );
        }
    }

public:
    THashSet<TString> WebmasterHosts;
    THashMap<ui32, time_t> TablesConfig;
    TDigestWeekConfig WeekConfig;
};

REGISTER_MAPPER(TExtractAllQueriesMapper)

struct TCalcAllQueriesCombiner : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
public:
    void Do(TReader *input, TWriter *output) override {
        const TString host = input->GetRow()[F_HOST].AsString();

        TMap<time_t, long> clicks, intmClicks;
        TCounter oldCounter, newCounter;
        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();

            const NYT::TNode &oldPeriodNode = row[F_OLD];
            oldCounter.Add(oldPeriodNode[F_SHOWS].AsInt64(), oldPeriodNode[F_CLICKS].AsInt64(), oldPeriodNode[F_POSITION].AsDouble());

            const NYT::TNode &newPeriodNode = row[F_NEW];
            newCounter.Add(newPeriodNode[F_SHOWS].AsInt64(), newPeriodNode[F_CLICKS].AsInt64(), newPeriodNode[F_POSITION].AsDouble());

            Deserialize(row[F_CLICKS_SPLIT].AsString(), intmClicks);
            for (const auto &obj : intmClicks) {
                clicks[obj.first] += obj.second;
            }
        }

        TString clicksStream;
        Serialize(clicks, clicksStream);

        output->AddRow(NYT::TNode()
            (F_HOST, host)
            (F_CLICKS_SPLIT, clicksStream)
            (F_OLD, NYT::TNode()
                (F_POSITION, oldCounter.GetPosition())
                (F_SHOWS, oldCounter.Shows)
                (F_CLICKS, oldCounter.Clicks)
            )
            (F_NEW, NYT::TNode()
                (F_POSITION, newCounter.GetPosition())
                (F_SHOWS, newCounter.Shows)
                (F_CLICKS, newCounter.Clicks)
            )
        );
    }
};

REGISTER_REDUCER(TCalcAllQueriesCombiner)

struct TCalcAllQueriesReducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    Y_SAVELOAD_JOB(WeekConfig)

    TCalcAllQueriesReducer() = default;
    TCalcAllQueriesReducer(const TDigestWeekConfig &weekConfig)
        : WeekConfig(weekConfig)
    {
    }

public:
    void Do(TReader *input, TWriter *output) override {
        const TString host = input->GetRow()[F_HOST].AsString();

        TMap<time_t, long> clicks, intmClicks;
        TCounter oldCounter, newCounter;
        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();

            const NYT::TNode &oldPeriodNode = row[F_OLD];
            oldCounter.Add(oldPeriodNode[F_SHOWS].AsInt64(), oldPeriodNode[F_CLICKS].AsInt64(), oldPeriodNode[F_POSITION].AsDouble());

            const NYT::TNode &newPeriodNode = row[F_NEW];
            newCounter.Add(newPeriodNode[F_SHOWS].AsInt64(), newPeriodNode[F_CLICKS].AsInt64(), newPeriodNode[F_POSITION].AsDouble());

            Deserialize(row[F_CLICKS_SPLIT].AsString(), intmClicks);
            for (const auto &obj : intmClicks) {
                clicks[obj.first] += obj.second;
            }
        }

        NYT::TNode oldClicksSeqNode = NYT::TNode::CreateList();
        NYT::TNode newClicksSeqNode = NYT::TNode::CreateList();
        for (const auto &obj : clicks) {
            if (WeekConfig.OldWeek.In(obj.first)) {
                oldClicksSeqNode.Add(NYT::TNode()
                    (F_PERIOD, obj.first)
                    (F_CLICKS, obj.second)
                );
            } else if (WeekConfig.NewWeek.In(obj.first)) {
                newClicksSeqNode.Add(NYT::TNode()
                    (F_PERIOD, obj.first)
                    (F_CLICKS, obj.second)
                );
            }
        }

        TSample<long> sampleClicks(oldCounter.Clicks, newCounter.Clicks);
        TSample<long> sampleShows(oldCounter.Shows, newCounter.Shows);
        TSample<double> samplePosition(oldCounter.GetPosition(), newCounter.GetPosition());

        output->AddRow(NYT::TNode()
            (F_HOST, host)
            (F_CLICKS_SPLIT, NYT::TNode()
                (F_OLD, oldClicksSeqNode)
                (F_NEW, newClicksSeqNode)
            )
            (F_CLICKS, NYT::TNode()
                (F_OLD, sampleClicks.OldValue)
                (F_NEW, sampleClicks.NewValue)
                (F_DIFF, sampleClicks.GetDiff())
                (F_DIFF_PERCENT, sampleClicks.GetDiffPercent())
            )
            (F_POSITION, NYT::TNode()
                (F_OLD, samplePosition.OldValue)
                (F_NEW, samplePosition.NewValue)
                (F_DIFF, samplePosition.GetDiff())
                (F_DIFF_PERCENT, samplePosition.GetDiffPercent())
            )
            (F_SHOWS, NYT::TNode()
                (F_OLD, sampleShows.OldValue)
                (F_NEW, sampleShows.NewValue)
                (F_DIFF, sampleShows.GetDiff())
                (F_DIFF_PERCENT, sampleShows.GetDiffPercent())
            )
        );
    }

public:
    TDigestWeekConfig WeekConfig;
};

REGISTER_REDUCER(TCalcAllQueriesReducer)

struct TExtractPopularUrlsMapper : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    Y_SAVELOAD_JOB(WebmasterHosts)

    TExtractPopularUrlsMapper() = default;
    TExtractPopularUrlsMapper(const THashSet<TString> &webmasterHosts)
        : WebmasterHosts(webmasterHosts)
    {
    }

public:
    void Do(TReader *input, TWriter *output) override {
        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();
            const TString host = row[F_YAMR_KEY].AsString();
            if (!WebmasterHosts.contains(host)) {
                continue;
            }

            proto::queries2::QueryMessage msg;
            Y_PROTOBUF_SUPPRESS_NODISCARD msg.ParseFromString(row[F_YAMR_VALUE].AsString());

            TString urlHost, urlPath;
            SplitUrlToHostAndPath(msg.url(), urlHost, urlPath);
            if (urlHost != host) {
                continue;
            }

            TCounter counter;
            counter.Add(msg);
            output->AddRow(NYT::TNode()
                (F_HOST, host)
                (F_PATH, urlPath)
                (F_CLICKS, static_cast<long>(counter.Clicks))
                (F_SHOWS, static_cast<long>(counter.Shows))
            );
        }
    }

public:
    THashSet<TString> WebmasterHosts;
};

REGISTER_MAPPER(TExtractPopularUrlsMapper)

struct TExtractPopularUrlsCombiner : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
public:
    void Do(TReader *input, TWriter *output) override {
        const TString host = input->GetRow()[F_HOST].AsString();
        const TString path = input->GetRow()[F_PATH].AsString();

        long clicks = 0;
        long shows = 0;
        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();
            clicks += row[F_CLICKS].AsInt64();
            shows += row[F_SHOWS].AsInt64();
        }

        output->AddRow(NYT::TNode()
            (F_HOST, host)
            (F_PATH, path)
            (F_CLICKS, clicks)
            (F_CLICKS_INV, -clicks)
            (F_SHOWS, shows)
            (F_SHOWS_INV, -shows)
        );
    }
};

REGISTER_REDUCER(TExtractPopularUrlsCombiner)

struct TExtractPopularUrlsReducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
public:
    void Do(TReader *input, TWriter *output) override {
        const size_t MAX_URLS = 100;
        for (size_t samples = 0; input->IsValid() && samples < MAX_URLS; input->Next()) {
            const NYT::TNode &row = input->GetRow();
            if (row[F_CLICKS].AsInt64() > row[F_SHOWS].AsInt64()) {
                //...filter broken data
            } else {
                output->AddRow(row);
                samples++;
            }
        }
    }
};

REGISTER_REDUCER(TExtractPopularUrlsReducer)

struct TExtractAllFavoritesQueriesReducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    Y_SAVELOAD_JOB(WeekConfig)

    TExtractAllFavoritesQueriesReducer() = default;
    TExtractAllFavoritesQueriesReducer(const TDigestWeekConfig &weekConfig)
        : WeekConfig(weekConfig)
    {
    }

public:
    void Do(TReader *input, TWriter *output) override {
        const TString host = input->GetRow()[F_YAMR_KEY].AsString();

        TCounter oldCounter, newCounter;
        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();

            proto::queries2::QueryMessage msg;
            Y_PROTOBUF_SUPPRESS_NODISCARD msg.ParseFromString(row[F_YAMR_VALUE].AsString());

            for (int i = 0; i < msg.reports_by_region_size(); i++) {
                const auto &region = msg.reports_by_region(i);

                if (WeekConfig.OldWeek.In(region.timestamp())) {
                    oldCounter.Add(region);
                } else if (WeekConfig.NewWeek.In(region.timestamp())) {
                    newCounter.Add(region);
                }
            }
        }

        TSample<long> sampleClicks(oldCounter.Clicks, newCounter.Clicks);
        TSample<long> sampleShows(oldCounter.Shows, newCounter.Shows);
        TSample<double> samplePosition(oldCounter.GetPosition(), newCounter.GetPosition());

        output->AddRow(NYT::TNode()
           (F_HOST, host)
           (F_CLICKS, NYT::TNode()
                (F_OLD, sampleClicks.OldValue)
                (F_NEW, sampleClicks.NewValue)
                (F_DIFF, sampleClicks.GetDiff())
                (F_DIFF_PERCENT, sampleClicks.GetDiffPercent())
            )
            (F_POSITION, NYT::TNode()
                (F_OLD, samplePosition.OldValue)
                (F_NEW, samplePosition.NewValue)
                (F_DIFF, samplePosition.GetDiff())
                (F_DIFF_PERCENT, samplePosition.GetDiffPercent())
            )
            (F_SHOWS, NYT::TNode()
                (F_OLD, sampleShows.OldValue)
                (F_NEW, sampleShows.NewValue)
                (F_DIFF, sampleShows.GetDiff())
                (F_DIFF_PERCENT, sampleShows.GetDiffPercent())
            )
        );
    }

public:
    TDigestWeekConfig WeekConfig;
};

REGISTER_REDUCER(TExtractAllFavoritesQueriesReducer)

struct TExtractGroupsSettingsMapper : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
public:
    void Do(TReader *input, TWriter *output) override {
        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();
            output->AddRow(NYT::TNode()
                (F_HOST, row[F_YAMR_KEY])
                (F_GROUP_ID, row[F_YAMR_SUBKEY])
                (F_GROUP_NAME, row["group_name"])
            );
        }
    }
};

REGISTER_MAPPER(TExtractGroupsSettingsMapper)

static void CalcDiff(const THashMap<TString, TCounter> &oldCounters, const THashMap<TString, TCounter> &newCounters, TMultiMap<long, TString> &diffMapShows,
    TMultiMap<long, TString> &diffMapClicks, TMultiMap<double, TString> &diffMapPosition)
{
    for (const auto &obj : oldCounters) {
        const TString &query = obj.first;
        const TCounter &oldCounter = obj.second;
        if (newCounters.contains(query)) {
            const TCounter &newCounter = newCounters.at(query);
            const long diffShows = newCounter.Shows - oldCounter.Shows;
            const long diffClicks = newCounter.Clicks - oldCounter.Clicks;
            const double diffPosition = newCounter.GetPosition() - oldCounter.GetPosition();

            diffMapShows.insert(std::make_pair(diffShows, query));
            diffMapClicks.insert(std::make_pair(diffClicks, query));
            diffMapPosition.insert(std::make_pair(diffPosition, query));
        }
    }
}

template <class TDiffMap, class FGetter, class TSamples>
static void GetSamples(const THashMap<TString, TCounter> &oldCounters, const THashMap<TString, TCounter> &newCounters, const TDiffMap &diffMap,
    FGetter valueGetter, TSamples &positiveSamples, TSamples &negativeSamples)
{
    const size_t MAX_SAMPLES = 5;

    for (auto it = diffMap.crbegin(); it != diffMap.crend() && positiveSamples.size() < MAX_SAMPLES; ++it) {
        const auto diff = it->first;
        if (diff > 0) {
            const auto &obj = it->second;
            typename TSamples::value_type sample;
            sample.OldValue = valueGetter(oldCounters.at(obj));
            if (sample.OldValue != 0) { // naou@: примеры, где старое значение "0" лучше не добавлять
                sample.NewValue = valueGetter(newCounters.at(obj));
                sample.Object = obj;
                positiveSamples.push_back(sample);
            }
        }
    }

    for (auto it = diffMap.cbegin(); it != diffMap.cend() && negativeSamples.size() < MAX_SAMPLES; ++it) {
        const auto diff = it->first;
        if (diff < 0) {
            const auto &obj = it->second;
            typename TSamples::value_type sample;
            sample.OldValue = valueGetter(oldCounters.at(obj));
            if (sample.OldValue != 0) {
                sample.NewValue = valueGetter(newCounters.at(obj));
                sample.Object = obj;
                negativeSamples.push_back(sample);
            }
        }
    }
}

template<class TSamples>
static void OutputSamples(NYT::TNode &row, const TSamples &samples, const TString &samplesFieldName/*, const TString &indicatorName*/) {
    for (const auto &sample : samples) {
        NYT::TNode &objectNode = row[samplesFieldName][sample.Object];
        objectNode[F_OLD] = sample.OldValue;
        objectNode[F_NEW] = sample.NewValue;
        objectNode[F_DIFF] = sample.GetDiff();
        objectNode[F_DIFF_PERCENT] = sample.GetDiffPercent();
    }
}

struct TCalcTopQueriesReducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    Y_SAVELOAD_JOB(WeekConfig)

    TCalcTopQueriesReducer() = default;
    TCalcTopQueriesReducer(const TDigestWeekConfig &weekConfig)
        : WeekConfig(weekConfig)
    {
    }

public:
    void Do(TReader *input, TWriter *output) override {
        const TString host = input->GetRow()[F_YAMR_KEY].AsString();
        THashMap<TString, TCounter> oldQueries, newQueries;
        //THashMap<TString, TCounter> oldUrls, newUrls;

        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();

            proto::queries2::QueryMessage msg;
            Y_PROTOBUF_SUPPRESS_NODISCARD msg.ParseFromString(row[F_YAMR_VALUE].AsString());

            for (int i = 0; i < msg.reports_by_region_size(); i++) {
                const auto &region = msg.reports_by_region(i);

                if (WeekConfig.OldWeek.In(region.timestamp())) {
                    oldQueries[msg.query()].Add(region);
                    //oldUrls[msg.url()].Add(region);
                } else if (WeekConfig.NewWeek.In(region.timestamp())) {
                    newQueries[msg.query()].Add(region);
                    //newUrls[msg.url()].Add(region);
                }
            }
        }

        TMultiMap<long, TString> diffQueriesShows;
        TMultiMap<long, TString> diffQueriesClicks;
        TMultiMap<double, TString> diffQueriesPosition;
        TDeque<TSample<long>> queriesShowsPosSamples, queriesShowsNegSamples;
        TDeque<TSample<long>> queriesClicksPosSamples, queriesClicksNegSamples;
        TDeque<TSample<double>> queriesPositionPosSamples, queriesPositionNegSamples;
        CalcDiff(oldQueries, newQueries, diffQueriesShows, diffQueriesClicks, diffQueriesPosition);
        GetSamples(oldQueries, newQueries, diffQueriesShows, TCounter::ShowsGetter, queriesShowsPosSamples, queriesShowsNegSamples);
        GetSamples(oldQueries, newQueries, diffQueriesClicks, TCounter::ClicksGetter, queriesClicksPosSamples, queriesClicksNegSamples);
        GetSamples(oldQueries, newQueries, diffQueriesPosition, TCounter::PositionGetter, queriesPositionNegSamples, queriesPositionPosSamples); /* negative and positive samples swapped */
        /*
        TMultiMap<long, TString> diffUrlsShows;
        TMultiMap<long, TString> diffUrlsClicks;
        TMultiMap<double, TString> diffUrlsPosition;
        TDeque<TSample<long>> urlsShowsPosSamples, urlsShowsNegSamples;
        TDeque<TSample<long>> urlsClicksPosSamples, urlsClicksNegSamples;
        TDeque<TSample<double>> urlsPositionPosSamples, urlsPositionNegSamples;
        CalcDiff(oldUrls, newUrls, diffUrlsShows, diffUrlsClicks, diffUrlsPosition);
        GetSamples(oldUrls, newUrls, diffUrlsShows, ShowsGetter, urlsShowsPosSamples, urlsShowsNegSamples);
        GetSamples(oldUrls, newUrls, diffUrlsClicks, ClicksGetter, urlsClicksPosSamples, urlsClicksNegSamples);
        GetSamples(oldUrls, newUrls, diffUrlsPosition, PositionGetter, urlsPositionPosSamples, urlsPositionNegSamples);
        */

        NYT::TNode row;
        row(F_HOST, host);
        OutputSamples(row, queriesShowsPosSamples,      F_POS_QUERIES_SHOWS/*,       "Shows"*/);
        OutputSamples(row, queriesClicksPosSamples,     F_POS_QUERIES_CLICKS/*,      "Clicks"*/);
        OutputSamples(row, queriesPositionPosSamples,   F_POS_QUERIES_POSITION/*,    "Position"*/);
        OutputSamples(row, queriesShowsNegSamples,      F_NEG_QUERIES_SHOWS/*,       "Shows"*/);
        OutputSamples(row, queriesClicksNegSamples,     F_NEG_QUERIES_CLICKS/*,      "Clicks"*/);
        OutputSamples(row, queriesPositionNegSamples,   F_NEG_QUERIES_POSITION/*,    "Position"*/);
        /*
        OutputSamples(row, urlsShowsPosSamples,         "UrlsShowsPosSamples",      "Shows");
        OutputSamples(row, urlsClicksPosSamples,        "UrlsClicksPosSamples",     "Clicks");
        OutputSamples(row, urlsPositionPosSamples,      "UrlsPositionPosSamples",   "Position");
        OutputSamples(row, urlsShowsNegSamples,         "UrlsShowsNegSamples",      "Shows");
        OutputSamples(row, urlsClicksNegSamples,        "UrlsClicksNegSamples",     "Clicks");
        OutputSamples(row, urlsPositionNegSamples,      "UrlsPositionNegSamples",   "Position");
        */
        output->AddRow(row);
    }

public:
    TDigestWeekConfig WeekConfig;
};

REGISTER_REDUCER(TCalcTopQueriesReducer)

struct TCalcGroupQueriesReducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    Y_SAVELOAD_JOB(TablesConfig)

    TCalcGroupQueriesReducer() = default;
    TCalcGroupQueriesReducer(const THashMap<ui32, EWeekPeriod> &tablesConfig)
        : TablesConfig(tablesConfig)
    {
    }

public:
    void Do(TReader *input, TWriter *output) override {
        const TString host = input->GetRow()[F_YAMR_KEY].AsString();
        THashMap<TString, TCounter> OldGroups, NewGroups;

        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();
            const TString groupId = input->GetRow()[F_YAMR_SUBKEY].AsString();
            const EWeekPeriod weekPeriod = TablesConfig.at(input->GetTableIndex());

            proto::queries2::QueryMessage msg;
            Y_PROTOBUF_SUPPRESS_NODISCARD msg.ParseFromString(row[F_YAMR_VALUE].AsString());

            for (int i = 0; i < msg.reports_by_region_size(); i++) {
                const auto &region = msg.reports_by_region(i);

                if (weekPeriod == E_WEEK_PERIOD_OLD) {
                    OldGroups[groupId].Add(region);
                } else if (weekPeriod == E_WEEK_PERIOD_NEW) {
                    NewGroups[groupId].Add(region);
                }
            }
        }

        TMultiMap<long, TString> diffGroupsShows;
        TMultiMap<long, TString> diffGroupsClicks;
        TMultiMap<double, TString> diffGroupsPosition;
        TDeque<TSample<long>> groupsShowsPosSamples, groupsShowsNegSamples;
        TDeque<TSample<long>> groupsClicksPosSamples, groupsClicksNegSamples;
        TDeque<TSample<double>> groupsPositionPosSamples, groupsPositionNegSamples;
        CalcDiff(OldGroups, NewGroups, diffGroupsShows, diffGroupsClicks, diffGroupsPosition);
        GetSamples(OldGroups, NewGroups, diffGroupsShows, TCounter::ShowsGetter, groupsShowsPosSamples, groupsShowsNegSamples);
        GetSamples(OldGroups, NewGroups, diffGroupsClicks, TCounter::ClicksGetter, groupsClicksPosSamples, groupsClicksNegSamples);
        GetSamples(OldGroups, NewGroups, diffGroupsPosition, TCounter::PositionGetter, groupsPositionNegSamples, groupsPositionPosSamples); /* negative and positive samples swapped */

        NYT::TNode row;
        row(F_HOST, host);
        OutputSamples(row, groupsShowsPosSamples,      F_POS_GROUPS_SHOWS/*,       "Shows"*/);
        OutputSamples(row, groupsClicksPosSamples,     F_POS_GROUPS_CLICKS/*,      "Clicks"*/);
        OutputSamples(row, groupsPositionPosSamples,   F_POS_GROUPS_POSITION/*,    "Position"*/);
        OutputSamples(row, groupsShowsNegSamples,      F_NEG_GROUPS_SHOWS/*,       "Shows"*/);
        OutputSamples(row, groupsClicksNegSamples,     F_NEG_GROUPS_CLICKS/*,      "Clicks"*/);
        OutputSamples(row, groupsPositionNegSamples,   F_NEG_GROUPS_POSITION/*,    "Position"*/);
        output->AddRow(row);
    }

public:
    THashMap<ui32, EWeekPeriod> TablesConfig;
};

REGISTER_REDUCER(TCalcGroupQueriesReducer)

time_t GetPeriodFromTopsAttr(const TString &tableName) {
    const char *FORMAT = "%Y-%m-%d";
    return TDate(NYTUtils::GetTableName(tableName), FORMAT).GetStart();
}

void LoadQueriesTables(NYT::IClientBasePtr client, const TWeekConfig &weekConfig, const TString &sourcePrefix, TSet<TString> &sourceTables) {
    TDeque<NYTUtils::TTableInfo> tables;
    NYTUtils::GetTableList(client, sourcePrefix, tables);

    TRegularExpression regex("(\\d+)_(\\d+)");
    for (const NYTUtils::TTableInfo &table : tables) {
        TVector<TString> period;
        if (regex.GetMatches(table.Name, period) != 2) {
            ythrow yexception() << "unable to parse source table name" << table.Name;
        }
        if (period[0] != period[1]) {
            ythrow yexception() << "unknown period in source table name " << table.Name;
        }
        time_t periodBegin = str2date(period[0]);
        //time_t periodEnd = str2date(period[1]);
        if (weekConfig.In(periodBegin)) {
            sourceTables.insert(table.Name);
            LOG_INFO("source all queries, week %s, found table %s", weekConfig.WeekName().data(), table.Name.data());
        }
    }
}

time_t GetTimestampFromSourceTable(const TString &tableName) {
    TRegularExpression regex("(\\d+)_(\\d+)");
    TVector<TString> period;
    if (regex.GetMatches(tableName, period) != 2) {
        ythrow yexception() << "unable to parse source table name";
    }
    return str2date(period[0]);
}

TString GetAllFavoritesQueriesTableName(const TDigestWeekConfig &weekConfig) {
    const auto &config = TConfig::CInstance();
    return NYTUtils::JoinPath(config.TABLE_DIGEST_SOURCE_QUERIES_FAVORITES_ALL, weekConfig.RangeName());
}

TString GetAllQueriesTableName(const TDigestWeekConfig &weekConfig) {
    const auto &config = TConfig::CInstance();
    return NYTUtils::JoinPath(config.TABLE_DIGEST_SOURCE_QUERIES_ALL, weekConfig.RangeName());
}

TString GetGroupsQueriesTableName(const TDigestWeekConfig &weekConfig) {
    const auto &config = TConfig::CInstance();
    return NYTUtils::JoinPath(config.TABLE_DIGEST_SOURCE_QUERIES_GROUPS, weekConfig.RangeName());
}

TString GetPopularUrlsTableName(const TDigestWeekConfig &weekConfig) {
    const auto &config = TConfig::CInstance();
    return NYTUtils::JoinPath(config.TABLE_DIGEST_SOURCE_QUERIES_POPULAR_URLS, weekConfig.NewWeek.WeekName());
}

TString GetSplitFavoritesQueriesTableName(const TDigestWeekConfig &weekConfig) {
    const auto &config = TConfig::CInstance();
    return NYTUtils::JoinPath(config.TABLE_DIGEST_SOURCE_QUERIES_FAVORITES_SPLIT, weekConfig.RangeName());
}

TString GetTopsQueriesTableName(const TDigestWeekConfig &weekConfig) {
    const auto &config = TConfig::CInstance();
    return NYTUtils::JoinPath(config.TABLE_DIGEST_SOURCE_QUERIES_TOPS, weekConfig.RangeName());
}

void PrepareAllQueriesSource(NYT::IClientBasePtr clientQueries, const THashSet<TString> &webmasterHosts, const TDigestWeekConfig &weekConfig) {
    const auto &config = TConfig::CInstance();

    const TString allQueriesSourceTable = GetAllQueriesTableName(weekConfig);
    if (clientQueries->Exists(allQueriesSourceTable)) {
        LOG_INFO("source all queries, table is already processed");
        return;
    }

    TSet<TString> sourceTablesOld;
    LoadQueriesTables(clientQueries, weekConfig.OldWeek, config.TABLE_SOURCE_QUERIES_CONVERTED_PREFIX, sourceTablesOld);
    if (sourceTablesOld.size() != 7) {
        ythrow yexception() << "unknown week source tables set " << sourceTablesOld.size() << " [" << JoinSeq(", ", sourceTablesOld) << "]";
    }

    TSet<TString> sourceTablesNew;
    LoadQueriesTables(clientQueries, weekConfig.NewWeek, config.TABLE_SOURCE_QUERIES_CONVERTED_PREFIX, sourceTablesNew);
    if (sourceTablesNew.size() != 7) {
        ythrow yexception() << "unknown week source tables set " << sourceTablesNew.size() << " [" << JoinSeq(", ", sourceTablesNew) << "]";
    }

    NYT::ITransactionPtr tx = clientQueries->StartTransaction();

    THashMap<ui32, time_t> tablesConfig;

    TOpRunner runner(tx);

    ui32 tableIndex = 0;
    for (const TString &sourceTable : sourceTablesOld) {
        runner.InputNode(DebugPath(sourceTable));
        tablesConfig[tableIndex++] = GetTimestampFromSourceTable(sourceTable);
    }

    for (const TString &sourceTable : sourceTablesNew) {
        runner.InputNode(DebugPath(sourceTable));
        tablesConfig[tableIndex++] = GetTimestampFromSourceTable(sourceTable);
    }

    runner
        .OutputNode(allQueriesSourceTable)
        .MemoryLimit(MEMORY_LIMIT_4GB)
        .ReduceBy(F_HOST)
        .MapReduce(new TExtractAllQueriesMapper(webmasterHosts, tablesConfig, weekConfig), new TCalcAllQueriesCombiner, new TCalcAllQueriesReducer(weekConfig))
        .SortBy(F_HOST)
        .Sort(allQueriesSourceTable)
    ;

    tx->Commit();
}

void PrepareFavoritesQueriesSource(NYT::IClientBasePtr clientQueries, const TDigestWeekConfig &weekConfig) {
    const auto &config = TConfig::CInstance();

    if (!clientQueries->Exists(config.TABLE_SOURCE_QUERIES_FAVORITES)) {
        ythrow yexception() << "there is no table " << config.TABLE_SOURCE_QUERIES_FAVORITES;
    }

    const TString leastRecentSourceStr = NYTUtils::GetAttr(clientQueries, config.TABLE_SOURCE_QUERIES_FAVORITES, ATTR_LEAST_RECENT_SOURCE_NAME).AsString();
    const TString mostRecentSourceStr = NYTUtils::GetAttr(clientQueries, config.TABLE_SOURCE_QUERIES_FAVORITES, ATTR_MOST_RECENT_SOURCE_NAME).AsString();

    const time_t leastRecentSource = GetPeriodFromTopsAttr(leastRecentSourceStr);
    const time_t mostRecentSource = GetPeriodFromTopsAttr(mostRecentSourceStr);

    const TString favoritesSplitSourceTable = GetSplitFavoritesQueriesTableName(weekConfig);
    const TString favoritesAllSourceTable = GetAllFavoritesQueriesTableName(weekConfig);
    if (clientQueries->Exists(favoritesAllSourceTable) && clientQueries->Exists(favoritesSplitSourceTable)) {
        LOG_INFO("source favorites queries, tables are already processed");
        return;
    }

    if (weekConfig.OldWeek.WeekStart < leastRecentSource || TDate(weekConfig.NewWeek.WeekEnd).GetStart() > mostRecentSource) {
        ythrow yexception() << "source favorites queries period is conflicting with week config:"
            << " weeks: " << NUtils::Date2StrTZ(weekConfig.OldWeek.WeekStart) << ".." << NUtils::Date2StrTZ(weekConfig.NewWeek.WeekEnd)
            << " tops: " << NUtils::Date2StrTZ(leastRecentSource) << ".." << NUtils::Date2StrTZ(mostRecentSource);
    }

    LOG_INFO("source favorites queries, processing %s [range in favorites source %s..%s]", favoritesSplitSourceTable.data(), NUtils::Date2StrTZ(leastRecentSource).data(), NUtils::Date2StrTZ(mostRecentSource).data());

    NYT::ITransactionPtr tx = clientQueries->StartTransaction();

    TOpRunner(tx)
        .InputNode(DebugPath(config.TABLE_SOURCE_QUERIES_FAVORITES))
        .OutputNode(NYT::TRichYPath(favoritesSplitSourceTable).SortedBy(F_HOST))
        .MemoryLimit(MEMORY_LIMIT_2GB)
        .ReduceBy(F_YAMR_KEY)
        .Reduce(new TCalcTopQueriesReducer(weekConfig))
        .SortBy(F_HOST)
        .Sort(favoritesSplitSourceTable)

        .InputNode(DebugPath(config.TABLE_SOURCE_QUERIES_FAVORITES))
        .OutputNode(NYT::TRichYPath(favoritesAllSourceTable).SortedBy(F_HOST))
        .MemoryLimit(MEMORY_LIMIT_2GB)
        .ReduceBy(F_YAMR_KEY)
        .Reduce(new TExtractAllFavoritesQueriesReducer(weekConfig))
        .SortBy(F_HOST)
        .Sort(favoritesAllSourceTable)
    ;

    tx->Commit();
}

void PrepareGroupsSettingsSource(NYT::IClientBasePtr clientQueriesTx) {
    const auto &config = TConfig::CInstance();
    TOpRunner(clientQueriesTx)
        .InputNode(config.TABLE_SOURCE_USER_SETTINGS_QUERIES_GROUPS)
        .OutputNode(config.TABLE_DIGEST_SOURCE_USER_SETTINGS_QUERIES_GROUPS)
        .Map(new TExtractGroupsSettingsMapper)
        .SortBy(F_HOST)
        .Sort(config.TABLE_DIGEST_SOURCE_USER_SETTINGS_QUERIES_GROUPS)
    ;
}

void PrepareGroupsQueriesSource(NYT::IClientBasePtr clientQueries, const TDigestWeekConfig &weekConfig) {
    const auto &config = TConfig::CInstance();
    const TString groupsSourceTable = GetGroupsQueriesTableName(weekConfig);
    if (clientQueries->Exists(groupsSourceTable)) {
        LOG_INFO("source groups queries, table is already processed");
        return;
    }

    TSet<TString> sourceTablesOld;
    LoadQueriesTables(clientQueries, weekConfig.OldWeek, config.TABLE_SOURCE_QUERIES_GROUPS_PREFIX, sourceTablesOld);
    if (sourceTablesOld.size() != 7) {
        ythrow yexception() << "unknown week source tables set " << sourceTablesOld.size() << " [" << JoinSeq(", ", sourceTablesOld) << "]";
    }

    TSet<TString> sourceTablesNew;
    LoadQueriesTables(clientQueries, weekConfig.NewWeek, config.TABLE_SOURCE_QUERIES_GROUPS_PREFIX, sourceTablesNew);
    if (sourceTablesNew.size() != 7) {
        ythrow yexception() << "unknown week source tables set " << sourceTablesNew.size() << " [" << JoinSeq(", ", sourceTablesNew) << "]";
    }

    NYT::ITransactionPtr tx = clientQueries->StartTransaction();

    PrepareGroupsSettingsSource(tx);

    THashMap<ui32, EWeekPeriod> tablesConfig;

    TOpRunner runner(tx);

    ui32 tableIndex = 0;
    for (const TString &sourceTable : sourceTablesOld) {
        runner.InputNode(DebugPath(sourceTable));
        tablesConfig[tableIndex++] = E_WEEK_PERIOD_OLD;
    }

    for (const TString &sourceTable : sourceTablesNew) {
        runner.InputNode(DebugPath(sourceTable));
        tablesConfig[tableIndex++] = E_WEEK_PERIOD_NEW;
    }

    runner
        .OutputNode(NYT::TRichYPath(groupsSourceTable).SortedBy(F_HOST))
        .ReduceBy(F_YAMR_KEY)
        .MemoryLimit(MEMORY_LIMIT_2GB)
        .Reduce(new TCalcGroupQueriesReducer(tablesConfig))
        .SortBy(F_HOST)
        .Sort(groupsSourceTable)
    ;

    tx->Commit();
}

void PreparePopularUrlsSource(NYT::IClientBasePtr clientQueries, const THashSet<TString> &webmasterHosts, const TDigestWeekConfig &weekConfig) {
    const auto &config = TConfig::CInstance();
    const TString popularUrlsSourceTable = GetPopularUrlsTableName(weekConfig);
    if (clientQueries->Exists(popularUrlsSourceTable)) {
        LOG_INFO("source popular urls, table is already processed");
        return;
    }

    TSet<TString> sourceTables;
    LoadQueriesTables(clientQueries, weekConfig.NewWeek, config.TABLE_SOURCE_QUERIES_CONVERTED_PREFIX, sourceTables);
    if (sourceTables.size() != 7) {
        ythrow yexception() << "unknown week source tables set " << sourceTables.size() << " [" << JoinSeq(", ", sourceTables) << "]";
    }

    NYT::ITransactionPtr tx = clientQueries->StartTransaction();

    TOpRunner runner(tx);
    for (const TString &sourceTable : sourceTables) {
        runner.InputNode(DebugPath(sourceTable));
    }

    runner
        .OutputNode(NYT::TRichYPath(popularUrlsSourceTable))
        .ReduceBy(F_HOST, F_PATH)
        .MemoryLimit(MEMORY_LIMIT_4GB)
        .Spec("data_size_per_sort_job", 1024 * 1024 * 1024) //combiner partition threshold
        .MapReduce(new TExtractPopularUrlsMapper(webmasterHosts), new TExtractPopularUrlsCombiner, new TExtractPopularUrlsCombiner)
        .SortBy(F_HOST, F_CLICKS_INV, F_SHOWS_INV)
        .Sort(popularUrlsSourceTable)
        .InputNode(popularUrlsSourceTable)
        .OutputNode(NYT::TRichYPath(popularUrlsSourceTable).SortedBy(NYT::TSortColumns(F_HOST, F_CLICKS_INV, F_SHOWS_INV)))
        .ReduceBy(F_HOST)
        .Reduce(new TExtractPopularUrlsReducer)
    ;

    tx->Commit();
}

void PrepareTopsQueriesSource(NYT::IClientBasePtr clientQueries, const TDigestWeekConfig &weekConfig) {
    const auto &config = TConfig::CInstance();
    if (!clientQueries->Exists(config.TABLE_SOURCE_QUERIES_TOPS)) {
        ythrow yexception() << "there is no table " << config.TABLE_SOURCE_QUERIES_TOPS;
    }

    const TString leastRecentSourceStr = NYTUtils::GetAttr(clientQueries, config.TABLE_SOURCE_QUERIES_TOPS, ATTR_LEAST_RECENT_SOURCE_NAME).AsString();
    const TString mostRecentSourceStr = NYTUtils::GetAttr(clientQueries, config.TABLE_SOURCE_QUERIES_TOPS, ATTR_MOST_RECENT_SOURCE_NAME).AsString();

    const time_t leastRecentSource = GetPeriodFromTopsAttr(leastRecentSourceStr);
    const time_t mostRecentSource = GetPeriodFromTopsAttr(mostRecentSourceStr);

    const TString topsSourceTable = GetTopsQueriesTableName(weekConfig);
    if (clientQueries->Exists(topsSourceTable)) {
        LOG_INFO("source top queries, table is already processed");
        return;
    }

    if (weekConfig.OldWeek.WeekStart < leastRecentSource || TDate(weekConfig.NewWeek.WeekEnd).GetStart() > mostRecentSource) {
        ythrow yexception() << "source top queries period is conflicting with week config:"
            << " weeks: " << NUtils::Date2StrTZ(weekConfig.OldWeek.WeekStart) << ".." << NUtils::Date2StrTZ(weekConfig.NewWeek.WeekEnd)
            << " tops: " << NUtils::Date2StrTZ(leastRecentSource) << ".." << NUtils::Date2StrTZ(mostRecentSource);
    }

    LOG_INFO("source top queries, processing %s [range in tops source %s..%s]", topsSourceTable.data(), NUtils::Date2StrTZ(leastRecentSource).data(), NUtils::Date2StrTZ(mostRecentSource).data());

    NYT::ITransactionPtr tx = clientQueries->StartTransaction();

    TOpRunner(tx)
        .InputNode(DebugPath(config.TABLE_SOURCE_QUERIES_TOPS))
        .OutputNode(NYT::TRichYPath(topsSourceTable).SortedBy(F_HOST))
        .MemoryLimit(MEMORY_LIMIT_2GB)
        .ReduceBy(F_YAMR_KEY)
        .Reduce(new TCalcTopQueriesReducer(weekConfig))
        .SortBy(F_HOST)
        .Sort(topsSourceTable)
    ;

    tx->Commit();
}

} //namespace NWebmaster
