#pragma once

#include <library/cpp/containers/comptrie/comptrie_trie.h>

#include "config.h"
#include "batch_matcher.h"

namespace NWebmaster {

constexpr size_t MAX_ROW_SIZE = 16 * 1024 * 1024;

constexpr const char *F_QUERY                   = "Query";
constexpr const char *F_QUERY_ID                = "QueryId";
constexpr const char *F_SHOWS                   = "Shows";
constexpr const char *F_CLICKS                  = "Clicks";
constexpr const char *F_POSITION                = "Position";
constexpr const char *F_IS_NAV                  = "IsNav";
constexpr const char *F_REGION_ID               = "RegionId";
constexpr const char *F_URL                     = "Url";
constexpr const char *F_URL_ID                  = "UrlId";
constexpr const char *F_RIVALS_CLICKS           = "RivalsClicks";
constexpr const char *F_RIVALS_SHOWS            = "RivalsShows";
constexpr const char *F_RIVALS_UNIQ_BY_CLICKS   = "UniqRivalsByClicks";
constexpr const char *F_RIVALS_UNIQ_BY_SHOWS    = "UniqRivalsByShows";
constexpr const char *F_TITLE                   = "Title";
constexpr const char *F_METADESCR               = "MetaDescription";
constexpr const char *F_HASH_SOURCE             = "HashSource";
constexpr const char *F_URLS_HASHES             = "UrlsHashes";

const THashSet<TString> &GetUrlsSkipList();
const THashSet<TString> &GetHostsSkipList();
const TDeque<TString> &GetQueriesJunkFilters();

struct TRivalsTables {
    TRivalsTables() = default;
    TRivalsTables(
        const TString &extractedQueries, const TString &extractedQueriesFilter,
        const TString &enrichedQueries,
        const TString &idsQueries, const TString &idsUrls,
        const TString &intmRawGroups, const TString &intmEnrichedGroups,
        const TString &enrichedGroups
    )
        : ExtractedQueries(extractedQueries)
        , ExtractedQueriesFilter(extractedQueriesFilter)
        , EnrichedQueries(enrichedQueries)
        , IdsQueries(idsQueries)
        , IdsUrls(idsUrls)
        , IntmRawGroups(intmRawGroups)
        , IntmEnrichedGroups(intmEnrichedGroups)
        , EnrichedGroups(enrichedGroups)
    {
    }

public:
    const TString ExtractedQueries;
    const TString ExtractedQueriesFilter;
    const TString EnrichedQueries;
    const TString IdsQueries;
    const TString IdsUrls;
    const TString IntmRawGroups;
    const TString IntmEnrichedGroups;
    const TString EnrichedGroups;
};

struct TMapExtractDirectQueries : public NYT::IMapper<NYT::TTableReader<NYT::TYaMRRow>, NYT::TTableWriter<NYT::TNode>> {
    Y_SAVELOAD_JOB(TrieStream, PeriodsConfig, Queries, PositionOffset)

public:
    TMapExtractDirectQueries() = default;
    TMapExtractDirectQueries(const TVector<char> &trieStream, const TDeque<time_t> &periodsConfig,
                             const TDeque<TString> &queries, int positionOffset);

public:
    void Start(TWriter* /*writer*/) override;
    void Do(TReader *input, TWriter *output) override;

    void ExtractForGroups(const TString &host, const proto::queries2::QueryMessage &msg, TWriter *output);

public:
    TVector<char> TrieStream;
    TCompactTrie<char> Trie;
    TDeque<time_t> PeriodsConfig;
    TDeque<TString> Queries;
    THolder<TBatchMatcher> Matcher;
    THashSet<TString> SkipListUrls;
    THashSet<TString> SkipListHosts;
    int PositionOffset;
};

struct TMapCountDirectQueries : public NYT::IMapper<NYT::TTableReader<NYT::TYaMRRow>, NYT::TTableWriter<NYT::TNode>> {
    Y_SAVELOAD_JOB(TrieStream, PeriodsConfig, Queries, PositionOffset)

public:
    TMapCountDirectQueries() = default;
    TMapCountDirectQueries(const TVector<char> &trieStream, const TDeque<time_t> &periodsConfig,
                             const TDeque<TString> &queries, int positionOffset);

public:
    void Start(TWriter* /*writer*/) override;
    void Do(TReader *input, TWriter *output) override;

public:
    TVector<char> TrieStream;
    TCompactTrie<char> Trie;
    TDeque<time_t> PeriodsConfig;
    TDeque<TString> Queries;
    THolder<TBatchMatcher> Matcher;
    THashSet<TString> SkipListUrls;
    THashSet<TString> SkipListHosts;
    int PositionOffset;
};

struct TCombineExtractedQueries : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    void Do(TReader *input, TWriter *output) override;
};

struct TReduceExtractedQueries : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    void Do(TReader *input, TWriter *output) override;
};

struct TReduceUnique : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    //reduce by any
    void Do(TReader *input, TWriter *output) override;
};

struct TMapGroupQueries : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    Y_SAVELOAD_JOB(TrieStream, OwnerToGroups, HashToGroup, ShardNo)

public:
    const static int SHARDS_COUNT = 4;

    TMapGroupQueries() = default;
    TMapGroupQueries(const TVector<char> &trieStream, const THashMap<TString, TVector<ui32>> &ownerToGroups, const THashMap<ui32, TString> &hashToGroup, int shardNo);

    void Start(TWriter* /*writer*/) override;
    void Do(TReader *input, TWriter *output) override;

public:
    TVector<char> TrieStream;
    TCompactTrie<char> Trie;
    THashMap<TString, TVector<ui32>> OwnerToGroups;
    THashMap<ui32, TString> HashToGroup;
    int ShardNo = 0;
};

struct TReduceProcessGroups : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    Y_SAVELOAD_JOB(FilterQueries)

public:
    TReduceProcessGroups() = default;
    TReduceProcessGroups(bool filterQueries)
        : FilterQueries(filterQueries)
    {
    }

    //ReduceBy Group, QueryId, RegionId
    void Do(TReader *input, TWriter *output) override;

public:
    bool FilterQueries;
};

struct TReduceJoinQueries : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    NYT::TNode GetOutputRow(const NYT::TNode &row);

    //reduce by QueryId
    void Do(TReader *input, TWriter *output) override;
};

struct TReduceJoinTextContent : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    NYT::TNode GetOutputRow(const NYT::TNode &row);

    //reduce by Hash
    void Do(TReader *input, TWriter *output) override;
};

struct TReduceWrapTextContent : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    //reduce by Hash
    void Do(TReader *input, TWriter *output) override;
};

NYT::TTableSchema GetEnrichedGroupsSchema();
void EnrichGroups(NYT::IClientBasePtr client, const TString &rawGroups, const TString &enrichedGroups, const TRivalsTables &rivalsTables);
void BuildHostsTrie(const THashSet<TString> &words, TVector<char> &dest);
void LoadHosts(NYT::IClientBasePtr client, const TString &sourceTable, const TString &column, THashSet<TString> &hosts);
void MergeEnrichedGroups(NYT::IClientBasePtr client, const TString &prefix, const TString &dest);
void TaskBuildQueries(NYT::IClientBasePtr clientQueries, const TConfig &config, const NHost2Vec::TTableConfig &host2VecTables, const TRivalsTables &rivalsTables,
                      int startPosition, bool filterQueries);
void TaskEnrichQueries(NYT::IClientBasePtr clientQueries, const TConfig &config, const TRivalsTables &rivalsTables);
void TaskGroupQueries(NYT::IClientBasePtr clientQueries, const TConfig &config, const NHost2Vec::TTableConfig &host2VecTables, const TRivalsTables &rivalsTables,
                      bool filterQueries);

} //namespace NWebmaster
