#include <util/string/printf.h>
#include <util/string/vector.h>
#include <util/random/random.h>
#include <util/thread/pool.h>

#include <library/cpp/containers/comptrie/comptrie_trie.h>
#include <library/cpp/containers/comptrie/prefix_iterator.h>

#include <wmconsole/version3/protos/queries2.pb.h>
#include <wmconsole/version3/wmcutil/compress.h>
#include <wmconsole/version3/wmcutil/log.h>
#include <wmconsole/version3/wmcutil/serialize.h>
#include <wmconsole/version3/wmcutil/url.h>
#include <wmconsole/version3/wmcutil/yt/misc.h>
#include <wmconsole/version3/wmcutil/yt/yt_runner.h>
#include <wmconsole/version3/wmcutil/yt/yt_utils.h>

#include "host2vec.h"
#include "query_utils.h"
#include "rivals.h"
#include "task_trends1.h"

namespace NWebmaster {
namespace NTrends1 {

namespace {
const char *ATTR_UPLOAD_TIME    = "upload_time";

const char *F_YAMR_KEY          = "key";
//const char *F_YAMR_SUBKEY     = "subkey";
const char *F_YAMR_VALUE        = "value";
const char *F_TOTAL_CLICKS      = "TotalClicks";
const char *F_TOTAL_SHOWS       = "TotalShows";
const char *F_GROUP_ID          = "GroupId";
const char *F_IS_MOBILE         = "IsMobile";
const char *F_IS_PAD            = "IsPad";
const char *F_CTR               = "CTR";
const char *F_SUM_CLICKS        = "SumClicks";
const char *F_SUM_SHOWS         = "SumShows";
const char *F_SELECTED_RIVALS   = "SelectedRivals";
}

struct TExtractQueriesMapper : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    Y_SAVELOAD_JOB(TrieStream)

public:
    static bool IsMonsterGroup(const TString &group) {
        static THashSet<TString> MONSTERS_LIST = {
            "my.mail.ru",
            "ok.ru",
            "plus.google.com",
            "tr-tr.facebook.com",
            "twitter.com",
            "vk.com",
            "avito.ru",
            "facebook.com",
            "linkedin.com",
            "youtube.com",
        };
        return MONSTERS_LIST.contains(group);
    }

    TExtractQueriesMapper() = default;
    TExtractQueriesMapper(const TVector<char> &trieStream)
        : TrieStream(trieStream)
    {
    }

    void Start(TWriter* /*writer*/) override {
        Trie.Init(&TrieStream[0], TrieStream.size());
        SkipList = {
            "http://maps.yandex.ru/geo_wizard",
            "http://yandex.com.tr/imageswizard", "http://yandex.by/imageswizard", "http://yandex.com/imageswizard",
            "http://yandex.kz/imageswizard", "http://yandex.ru/imageswizard", "http://yandex.ua/imageswizard",
            "http://yandex.by/video/videoblend", "http://yandex.com/video/videoblend", "http://yandex.com.tr/video/videoblend",
            "http://yandex.kz/video/videoblend", "http://yandex.ru/video/videoblend", "http://yandex.ua/video/videoblend",

            "https://maps.yandex.ru/geo_wizard",
            "https://yandex.by/imageswizard", "https://yandex.com.tr/imageswizard", "https://yandex.com/imageswizard",
            "https://yandex.kz/imageswizard", "https://yandex.ru/imageswizard", "https://yandex.ua/imageswizard",
            "https://yandex.by/video/videoblend", "https://yandex.com.tr/video/videoblend", "https://yandex.com/video/videoblend",
            "https://yandex.kz/video/videoblend", "https://yandex.ru/video/videoblend", "https://yandex.ua/video/videoblend",
        };
    }

    void Do(TReader *input, TWriter *output) override {
        static TDeque<TString> filters = {
            "<<"
            "|\\&\\&"
            "|\\~\\~"
            "|/\\+"
            "|/-"
            "|\\|"
            "|title:"
            "|url:"
            "|site:"
            "|inurl:"
            "|host:"
            "|rhost:"
            "|domain:"
            "|mime:"
            "|lang:"
            "|date:"
            "|cat:"
            "|^\\s*!"
            "|^\"\\s*!"
        };

        TBatchMatcher junkMatcher(filters);

        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();
            const TString host = row[F_YAMR_KEY].AsString();
            const TString fixedHost = TString{NUtils::GetHost2vecDomain(host)};
            TString rhost = host;
            rhost.reverse();

            if (IsMonsterGroup(fixedHost)) {
                continue;
            }

            bool found = false;
            for (auto it = MakePrefixIterator(Trie, rhost.data(), rhost.size()); it; ++it) {
                const TString owner = host.substr(host.size() - it.GetPrefixLen());
                if (NUtils::IsSubdomain(host, owner)) {
                    found = true;
                    break;
                }
            }

            if (!found) {
                continue;
            }

            proto::queries2::QueryMessage msg;
            msg.ParseFromString(row[F_YAMR_VALUE].AsString());

            if (junkMatcher.Matches(msg.corrected_query())) {
                continue;
            }

            TString urlHost, urlPath;
            if (!NUtils::SplitUrl(msg.url(), urlHost, urlPath)) {
                continue;
            }

            if (SkipList.contains(msg.url())) {
                continue;
            }

            for (int i = 0; i < msg.reports_by_region_size(); i++) {
                size_t regionClicks = 0;
                size_t regionShows = 0;
                const auto &region = msg.reports_by_region(i);
                for (int p = 0; p < region.position_info_size(); p++) {
                    const auto &position = msg.reports_by_region(i).position_info(p);
                    regionShows += position.shows_count();
                    regionClicks += position.clicks_count();
                }
                output->AddRow(NYT::TNode()
                   (NHost2Vec::F_HOST, fixedHost)
                   (F_IS_MOBILE, region.is_mobile())
                   (F_IS_PAD, region.is_pad())
                   (F_REGION_ID, region.region_id())
                   (F_SHOWS, regionShows)   //there can be filter msg.is_nav()?
                   (F_CLICKS, regionClicks) //there can be filter msg.is_nav()?
                   (F_TOTAL_SHOWS, regionShows)
                   (F_TOTAL_CLICKS, regionClicks)
                );
            }
        }
    }

public:
    TVector<char> TrieStream;
    TCompactTrie<char> Trie;
    THolder<TBatchMatcher> Matcher;
    THashSet<TString> SkipList;
};

REGISTER_MAPPER(TExtractQueriesMapper)

//ReduceBy F_HOST, F_IS_MOBILE, F_IS_PAD, F_REGION_ID
struct TExtractedQueriesCombiner : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
public:
    void Do(TReader *input, TWriter *output) override {
        NYT::TNode row = input->GetRow();

        size_t filteredShows = 0;
        size_t filteredClicks = 0;
        size_t totalShows = 0;
        size_t totalClicks = 0;

        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();
            filteredShows += row[F_SHOWS].AsUint64();
            filteredClicks += row[F_CLICKS].AsUint64();
            totalShows += row[F_TOTAL_SHOWS].AsUint64();
            totalClicks += row[F_TOTAL_CLICKS].AsUint64();
        }

        output->AddRow(row
           (F_SHOWS, filteredShows)
           (F_CLICKS, filteredClicks)
           (F_TOTAL_SHOWS, totalShows)
           (F_TOTAL_CLICKS, totalClicks)
       );
    }
};

REGISTER_REDUCER(TExtractedQueriesCombiner)

//ReduceBy F_HOST
struct TJoinGroupsReducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    void Do(TReader *input, TWriter *output) override {
        const ui32 INPUT_TABLENO_GROUPS     = 0;
        const ui32 INPUT_TABLENO_QUERIES    = 1;
        const ui32 OUTPUT_TABLENO_GROUPS    = 0;
        const ui32 OUTPUT_TABLENO_COUNTERS  = 1;

        TVector<ui32> groups;
        size_t hostClicks = 0;
        const NYT::TNode host = input->GetRow()[NHost2Vec::F_HOST];
        for (; input->IsValid(); input->Next()) {
            NYT::TNode row = input->GetRow();

            if (input->GetTableIndex() == INPUT_TABLENO_GROUPS) {
                //const bool isMain = row[F_MAIN_IN_GROUP].AsString();
                NHost2Vec::GetGroupsHashes(row[NHost2Vec::F_GROUPS].AsString(), groups);
            } else if (input->GetTableIndex() == INPUT_TABLENO_QUERIES) {
                hostClicks += row[F_TOTAL_CLICKS].AsUint64();
                for (ui32 groupId : groups) {
                    output->AddRow(row
                        (F_GROUP_ID, groupId),
                        OUTPUT_TABLENO_GROUPS
                    );
                }
            }
        }

        if (hostClicks > 0) {
            output->AddRow(NYT::TNode()
                (NHost2Vec::F_HOST, host)
                (F_TOTAL_CLICKS, hostClicks),
                OUTPUT_TABLENO_COUNTERS
            );
        }
    }
};

REGISTER_REDUCER(TJoinGroupsReducer)

//ReduceBy F_GROUP_ID
struct TProcessGroupsReducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    Y_SAVELOAD_JOB(HashToGroup, TrieStream, HostClicks)

public:
    TProcessGroupsReducer() = default;
    TProcessGroupsReducer(const THashMap<ui32, TString> &hashToGroup, const TVector<char> &trieStream, const THashMap<TString, size_t> &hostClicks)
        : HashToGroup(hashToGroup)
        , TrieStream(trieStream)
        , HostClicks(hostClicks)
    {
    }

    void Start(TWriter* /*writer*/) override {
        Trie.Init(&TrieStream[0], TrieStream.size());
    }

    void Do(TReader *input, TWriter *output) override {
        struct TStat {
            float GetAvgClicks() const {
                if (Count > 0) {
                    return static_cast<float>(Clicks) / static_cast<float>(Count);
                }
                return 0;
            }

            float GetAvgShows() const {
                if (Count > 0) {
                    return static_cast<float>(Shows) / static_cast<float>(Count);
                }
                return 0;
            }

            float GetAvgCTR() const {
                if (Count > 0) {
                    return CTR / static_cast<float>(Count);
                }
                return 0;
            }

            float GetSumClicks() const {
                return static_cast<float>(Clicks);
            }

            float GetSumShows() const {
                return static_cast<float>(Shows);
            }

            void Add(size_t clicks, size_t shows) {
                Clicks += clicks;
                Shows += shows;
                Count++;
                if (shows > 0) {
                    CTR += (static_cast<float>(clicks) / static_cast<float>(shows));
                }
            }

        private:
            size_t Clicks = 0;
            size_t Shows = 0;
            float CTR = 0.0;
            size_t Count = 0;
        };

        const ui32 groupId = input->GetRow()[F_GROUP_ID].AsUint64();
        const TString &group = HashToGroup.at(groupId);
        //bool isMainInGroup = (owner == group);

        if (TExtractQueriesMapper::IsMonsterGroup(group)) {
            return;
        }

        TDeque<NYT::TNode> inputRows;

        THashMap<std::pair<bool, bool>, TStat> devicesStats;
        THashMap<std::pair<bool, bool>, TStat> ownerDevicesStats;
        TStat stats;
        TStat ownerStats;

        bool fakeGroupClicks = false;
        size_t groupClicks = 0;
        for (const auto &obj : HostClicks) {
            const TString &host = obj.first;
            if (NUtils::IsSubdomain(host, group)) {
                groupClicks = Max(groupClicks, obj.second);
            }
        }

        if (groupClicks == 0) {
            fakeGroupClicks = true;
            groupClicks = 1;
        }

        const size_t minGroupClicksBound = groupClicks / 5;
        const size_t maxGroupClicksBound = groupClicks * 5;

        TSet<TString> selectedRivals;
        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();
            const TString host = row[NHost2Vec::F_HOST].AsString();
            const auto deviceType = std::make_pair(row[F_IS_MOBILE].AsBool(), row[F_IS_PAD].AsBool());
            const size_t clicks = row[F_CLICKS].AsUint64();
            const size_t shows = row[F_SHOWS].AsUint64();

            if (host == group) {
                ownerDevicesStats[deviceType].Add(clicks, shows);
                ownerStats.Add(clicks, shows);
            }

            //!!! из-за фильтра (clicks <= shows) иногда будет расходиться сумма кликов у конкурентов из Debug со значением RivalsTotal.SumClicks
            if (!NUtils::IsSubdomain(host, group) && (clicks <= shows)) {
                //const ui32 regionId = row[F_REGION_ID].AsUint64();
                //devicesRegionsStats[isMobile][regionId].Add(clicks, shows);
                //regionsStats[regionId].Add(clicks, shows);

                const auto rivalClicksIt = HostClicks.find(host);
                if (rivalClicksIt != HostClicks.end()) {
                    const size_t rivalClicks = rivalClicksIt->second;
                    if (rivalClicks > minGroupClicksBound && rivalClicks < maxGroupClicksBound) {
                        devicesStats[deviceType].Add(clicks, shows);
                        stats.Add(clicks, shows);
                        selectedRivals.insert(host);
                    }
                }
            }
        }

        NYT::TNode devicesNode = NYT::TNode::CreateList();
        NYT::TNode ownerDevicesNode = NYT::TNode::CreateList();
        for (auto obj : devicesStats) {
            const std::pair<bool, bool> deviceType = obj.first;
            devicesNode.Add(NYT::TNode()
                (F_IS_MOBILE, deviceType.first)
                (F_IS_PAD, deviceType.second)
                (F_CTR, obj.second.GetAvgCTR())
                (F_CLICKS, obj.second.GetAvgClicks())
                (F_SHOWS, obj.second.GetAvgShows())
                (F_SUM_CLICKS, obj.second.GetSumClicks())
                (F_SUM_SHOWS, obj.second.GetSumShows())
            );
        }

        for (auto obj : ownerDevicesStats) {
            const std::pair<bool, bool> deviceType = obj.first;
            ownerDevicesNode.Add(NYT::TNode()
                (F_IS_MOBILE, deviceType.first)
                (F_IS_PAD, deviceType.second)
                (F_CTR, obj.second.GetAvgCTR())
                (F_CLICKS, obj.second.GetAvgClicks())
                (F_SHOWS, obj.second.GetAvgShows())
                (F_SUM_CLICKS, obj.second.GetSumClicks())
                (F_SUM_SHOWS, obj.second.GetSumShows())
            );
        }

        /*
        NYT::TNode devicesRegionsNode = NYT::TNode::CreateList();
        for (auto deviceObj : devicesRegionsStats) {
            const bool isMobile = deviceObj.first;
            for (auto regionObj : deviceObj.second) {
                const ui32 regionId = regionObj.first;
                devicesRegionsNode.Add(NYT::TNode()
                    (F_IS_MOBILE, isMobile)
                    (F_REGION_ID, regionId)
                    (F_CTR, regionObj.second.GetAvgCTR())
                    (F_CLICKS, regionObj.second.GetAvgClicks())
                    (F_SHOWS, regionObj.second.GetAvgShows())
                );
            }
        }

        NYT::TNode regionsNode = NYT::TNode::CreateList();
        for (auto regionObj : regionsStats) {
            const ui32 regionId = regionObj.first;
            regionsNode.Add(NYT::TNode()
                (F_REGION_ID, regionId)
                (F_CTR, regionObj.second.GetAvgCTR())
                (F_CLICKS, regionObj.second.GetAvgClicks())
                (F_SHOWS, regionObj.second.GetAvgShows())
            );
        }
        */

        NYT::TNode debugNode;
        debugNode("Bounds", NYT::TNode()
            ("GroupClicks", groupClicks)
            ("Min", minGroupClicksBound)
            ("Max", maxGroupClicksBound)
            ("FakeGroupClicks", fakeGroupClicks)
        );

        NYT::TNode debugNodeRivals = NYT::TNode::CreateList();
        for (const TString &rivalHost : selectedRivals) {
            debugNodeRivals.Add(NYT::TNode()
                (rivalHost, HostClicks.at(rivalHost))
            );
        }
        debugNode("SelectedRivals", debugNodeRivals);

        TString rGroup = group;
        rGroup.reverse();
        TCompactTrie<char> groupTrie = Trie.FindTails(rGroup);

        for (const auto &entry : groupTrie) {
            const TString &suffix = entry.first;
            const TString rHost = rGroup + suffix;
            TString host = rHost;
            host.reverse();
            const TString fixedHost = TString{NUtils::GetHost2vecDomain(host)};
            if (fixedHost == group) {
                output->AddRow(NYT::TNode()
                    (NHost2Vec::F_HOST, host)
                    (NHost2Vec::F_GROUP, group)
                    (F_SELECTED_RIVALS, selectedRivals.size())
                    ("Debug", debugNode)
                    ("OwnerDevices", ownerDevicesNode)
                    ("OwnerTotal", NYT::TNode()
                        (F_CTR, ownerStats.GetAvgCTR())
                        (F_CLICKS, ownerStats.GetAvgClicks())
                        (F_SHOWS, ownerStats.GetAvgShows())
                        (F_SUM_CLICKS, ownerStats.GetSumClicks())
                        (F_SUM_SHOWS, ownerStats.GetSumShows())
                    )
                    ("RivalsDevices", devicesNode)
                    ("RivalsTotal", NYT::TNode()
                        (F_CTR, stats.GetAvgCTR())
                        (F_CLICKS, stats.GetAvgClicks())
                        (F_SHOWS, stats.GetAvgShows())
                        (F_SUM_CLICKS, stats.GetSumClicks())
                        (F_SUM_SHOWS, stats.GetSumShows())
                    )
                    //("DevicesRegions", devicesRegionsNode)
                    //("Regions", regionsNode)
                );
            }
        }
    }

public:
    THashMap<ui32, TString> HashToGroup;
    TVector<char> TrieStream;
    TCompactTrie<char> Trie;
    THashMap<TString, size_t> HostClicks;
};

REGISTER_REDUCER(TProcessGroupsReducer)

void GetTablesDiffByPrefix(NYT::IClientBasePtr client, const TString &srcPrefix, const TString &dstPrefix, TDeque<NYTUtils::TTableInfo> &toDelete, TDeque<NYTUtils::TTableInfo> &toProcess) {
    struct TTable {
        TTable() = default;
        TTable(const NYTUtils::TTableInfo &tableInfo)
            : TableInfo(tableInfo)
            , TableName(NYTUtils::GetTableName(TableInfo.Name))
        {
        }
        bool operator<(const TTable &rhs) const {
            return TableName < rhs.TableName;
        }
    public:
        const NYTUtils::TTableInfo TableInfo;
        const TString TableName;
    };

    TDeque<NYTUtils::TTableInfo> dstTables;
    NYTUtils::GetTableList(client, dstPrefix, dstTables, Max<size_t>());
    TSet<TTable> dstTablesSet;
    for (const NYTUtils::TTableInfo &table : dstTables) {
        dstTablesSet.insert(TTable(table));
    }

    TDeque<NYTUtils::TTableInfo> srcTables;
    NYTUtils::GetTableList(client, srcPrefix, srcTables, Max<size_t>());
    TSet<TTable> srcTablesSet;
    for (const NYTUtils::TTableInfo &table : srcTables) {
        srcTablesSet.insert(TTable(table));
    }

    TDeque<TTable> tablesToDelete;
    TDeque<TTable> tablesToProcess;
    std::set_difference(srcTablesSet.begin(), srcTablesSet.end(), dstTablesSet.begin(), dstTablesSet.end(), std::back_inserter(tablesToProcess));
    std::set_difference(dstTablesSet.begin(), dstTablesSet.end(), srcTablesSet.begin(), srcTablesSet.end(), std::back_inserter(tablesToDelete));

    toDelete.clear();
    for (const TTable &table : tablesToDelete) {
        toDelete.push_back(table.TableInfo);
    }

    toProcess.clear();
    for (const TTable &table : tablesToProcess) {
        toProcess.push_back(table.TableInfo);
    }
}

void ReadHostCounters(NYT::IClientBasePtr client, const TString &sourceTable, THashMap<TString, size_t> &hostClicks) {
    THashMap<TString, size_t> tmpHostClicks;
    auto reader = client->CreateTableReader<NYT::TNode>(sourceTable);
    for (; reader->IsValid(); reader->Next()) {
        const NYT::TNode &row = reader->GetRow();
        const TString host = row[NHost2Vec::F_HOST].AsString();
        const size_t clicks = row[F_TOTAL_CLICKS].AsUint64();
        tmpHostClicks[host] = clicks;
    }
    hostClicks.swap(tmpHostClicks);
}

void TaskBuildQueries(NYT::IClientBasePtr clientQueries, const TConfig &config, const NHost2Vec::TTableConfig &host2VecTables, const THashSet<TString> &webmasterHosts) {
    THashMap<ui32, TString> modelHashToGroup;
    THashSet<TString> modelHosts;
    TVector<char> modelHostsTrieStream;
    TVector<char> webmasterHostsTrieStream;

    LoadHosts(clientQueries, host2VecTables.HostsHash, NHost2Vec::F_HOST, modelHosts);
    BuildHostsTrie(modelHosts, modelHostsTrieStream);
    BuildHostsTrie(webmasterHosts, webmasterHostsTrieStream);
    NHost2Vec::LoadGroupsHashes(clientQueries, host2VecTables.GroupsHash, modelHashToGroup);

    TDeque<NYTUtils::TTableInfo> toDelete, toProcess;
    GetTablesDiffByPrefix(clientQueries, config.TABLE_SOURCE_QUERIES_CONVERTED_PREFIX, config.TABLE_RIVALS_QUERIES_REPORT_ROOT, toDelete, toProcess);
    std::reverse(toProcess.begin(), toProcess.end());

    for (const NYTUtils::TTableInfo &table : toDelete) {
        LOG_INFO("rivals queries, delete = %s", table.Name.data());
        TOpRunner(clientQueries).Drop(table.Name);
    }

    THolder<IThreadPool> queue(CreateThreadPool(4));
    for (const NYTUtils::TTableInfo &table : toProcess) {
        const TString tableName = NYTUtils::GetTableName(table.Name);
        const TString tmpExtractedQueries   = NYTUtils::JoinPath(config.TABLE_RIVALS_QUERIES_TMP_EXTRACTED_QUERIES, tableName);
        const TString tmpRawGroups          = NYTUtils::JoinPath(config.TABLE_RIVALS_QUERIES_TMP_RAW_GROUPS, tableName);
        const TString tmpHostCounters       = NYTUtils::JoinPath(config.TABLE_RIVALS_QUERIES_TMP_HOST_COUNTERS, tableName);
        const TString groupsReport          = NYTUtils::JoinPath(config.TABLE_RIVALS_QUERIES_REPORT_ROOT, tableName);
        LOG_INFO("rivals queries, process = %s", table.Name.data());

        queue->SafeAddFunc([=, &clientQueries, &modelHashToGroup, &modelHostsTrieStream, &webmasterHostsTrieStream]() {
            try {
                NYT::ITransactionPtr tx = clientQueries->StartTransaction();
                //NYT::IClientBasePtr tx = clientQueries;
                //const int shardNo = 0;

                NWebmaster::TOpRunner(tx)
                    .InputNode(table.Name)
                    .OutputNode(tmpExtractedQueries)
                    .MemoryLimit(MEMORY_LIMIT_4GB)
                    .UseTmpfs()
                    .ReduceBy(NHost2Vec::F_HOST, F_IS_MOBILE, F_IS_PAD, F_REGION_ID)
                    .MapReduce(new TExtractQueriesMapper(modelHostsTrieStream), new TExtractedQueriesCombiner, new TExtractedQueriesCombiner)
                    .SortBy(NHost2Vec::F_HOST)
                    .Sort(tmpExtractedQueries)

                    .InputNode(host2VecTables.HostGroupsHash)
                    .InputNode(tmpExtractedQueries)
                    .OutputNode(NYT::TRichYPath(tmpRawGroups).SortedBy(NHost2Vec::F_HOST))
                    .OutputNode(NYT::TRichYPath(tmpHostCounters).SortedBy(NHost2Vec::F_HOST))
                    .JobCount(10000)
                    .ReduceBy(NHost2Vec::F_HOST)
                    .Reduce(new TJoinGroupsReducer)
                    .SortBy(F_GROUP_ID)
                    .Sort(tmpRawGroups)
                    .Drop(tmpExtractedQueries)
                ;

                THashMap<TString, size_t> hostClicks;
                ReadHostCounters(tx, tmpHostCounters, hostClicks);

                NWebmaster::TOpRunner(tx)
                    .InputNode(tmpRawGroups)
                    .OutputNode(groupsReport)
                    .ReduceBy(F_GROUP_ID)
                    .MemoryLimit(MEMORY_LIMIT_4GB)
                    .Reduce(new TProcessGroupsReducer(modelHashToGroup, webmasterHostsTrieStream, hostClicks))
                    .Drop(tmpRawGroups)
                    .Drop(tmpHostCounters)
                    .SortBy(NHost2Vec::F_HOST)
                    .Sort(groupsReport)
                ;

                tx->Commit();
            } catch (yexception &e) {
                LOG_ERROR("rivals queries, unable to build queries: %s", e.what());
            }
        });
    }

    queue->Stop();
}

static int AgeDays(time_t timestamp) {
    return (Now() - TInstant::Seconds(timestamp)).Hours() / 24;
}

void CreatePaths(NYT::IClientPtr clientQueries, const TConfig &config) {
    NYTUtils::CreatePath(clientQueries, config.TABLE_RIVALS_QUERIES_REPORT_ROOT);
    NYTUtils::CreatePath(clientQueries, config.TABLE_RIVALS_QUERIES_HOST2VEC_ROOT);
    NYTUtils::CreatePath(clientQueries, config.TABLE_RIVALS_QUERIES_TMP_EXTRACTED_QUERIES);
    NYTUtils::CreatePath(clientQueries, config.TABLE_RIVALS_QUERIES_TMP_RAW_GROUPS);
    NYTUtils::CreatePath(clientQueries, config.TABLE_RIVALS_QUERIES_TMP_HOST_COUNTERS);
}

void RivalsCountQueries(const TConfig &config) {
    NYT::IClientPtr clientQueries = NYT::CreateClient(config.MR_SERVER_HOST_QUERIES);
    CreatePaths(clientQueries, config);

    THashSet<TString> webmasterHosts;
    if (!NYTUtils::LoadWebmastersHosts(clientQueries, config.TABLE_SOURCE_WEBMASTER_HOSTS, webmasterHosts)) {
        ythrow yexception() << "webmaster hosts table is empty";
    }

    NHost2Vec::TTableConfig host2vecTables(
        config.TABLE_SOURCE_WEBMASTER_HOSTS,
        config.TABLE_RIVALS_QUERIES_HOST2VEC_HOST2VEC,
        config.TABLE_RIVALS_QUERIES_HOST2VEC_HOST_GROUPS,
        config.TABLE_RIVALS_QUERIES_HOST2VEC_HOSTS,
        config.TABLE_RIVALS_QUERIES_HOST2VEC_GROUPS
    );

    TaskBuildQueries(clientQueries, config, host2vecTables, webmasterHosts);
}

void RivalsCountQueriesUpdateHost2vec(const TConfig &config) {
    NYT::IClientPtr clientQueries = NYT::CreateClient(config.MR_SERVER_HOST_QUERIES);
    CreatePaths(clientQueries, config);

    time_t uploadTime = 0;
    try {
        uploadTime = NYTUtils::GetAttr(clientQueries, config.TABLE_RIVALS_QUERIES_HOST2VEC_HOST2VEC, ATTR_UPLOAD_TIME).AsInt64();
    } catch (yexception &e) {
        LOG_WARN("rival queries, unable to get host2vec upload time: %s", e.what());
    }

    NHost2Vec::TTableConfig host2vecTables(
        config.TABLE_SOURCE_WEBMASTER_HOSTS,
        config.TABLE_RIVALS_QUERIES_HOST2VEC_HOST2VEC,
        config.TABLE_RIVALS_QUERIES_HOST2VEC_HOST_GROUPS,
        config.TABLE_RIVALS_QUERIES_HOST2VEC_HOSTS,
        config.TABLE_RIVALS_QUERIES_HOST2VEC_GROUPS
    );

    if (AgeDays(uploadTime) > 6) {
        NYT::ITransactionPtr tx = clientQueries->StartTransaction();
        NHost2Vec::TaskUpdateHost2vecGroups(tx, F_YAMR_KEY, host2vecTables);
        NYTUtils::SetAttr(tx, config.TABLE_RIVALS_QUERIES_HOST2VEC_HOST2VEC, ATTR_UPLOAD_TIME, Now().TimeT());
        tx->Commit();
    }
}

} //namespace NTrends1
} //namespace NWebmaster
