#include <util/datetime/cputimer.h>
#include <util/generic/vector.h>
#include <util/generic/deque.h>
#include <util/generic/hash_set.h>
#include <util/thread/pool.h>
#include <util/string/reverse.h>

#include <library/cpp/containers/comptrie/comptrie.h>
#include <library/cpp/containers/comptrie/prefix_iterator.h>
#include <library/cpp/geobase/lookup.hpp>

#include <mapreduce/yt/interface/client.h>
#include <geobase/include/lookup.hpp>

#include <wmconsole/version3/protos/queries2.pb.h>
#include <wmconsole/version3/wmcutil/url.h>
#include <wmconsole/version3/wmcutil/yt/transfer_manager.h>
#include <wmconsole/version3/wmcutil/yt/yt_runner.h>

#include "batch_matcher.h"
#include "config.h"
#include "host2vec.h"
#include "monitor.h"
#include "rivals.h"
#include "query_utils.h"
#include "source_tables.h"
#include "task_autocampaign.h"

const char *F_DOMAIN            = "Domain";
const char *F_PARENT_REGION_ID  = "ParentRegionId";
const char *F_VITAL_QUERY       = "VitalQuery";
const char *F_FILTERED_FLAGS    = "FilteredQueryFlags";
const char *F_FILTERED_QUERY    = "FilteredQuery";

namespace NWebmaster {
namespace NAutoCampaign {

struct TAutoCampaignConfigRecord {
    Y_SAVELOAD_DEFINE(ParentRegionId, VitalQuery)

    TAutoCampaignConfigRecord() = default;
    TAutoCampaignConfigRecord(const NYT::TNode &row) {
        ParentRegionId = row[F_PARENT_REGION_ID].AsUint64();
        VitalQuery = row[F_VITAL_QUERY].AsBool();
    }

public:
    ui32 ParentRegionId = 0;
    bool VitalQuery = false;
};

struct TAutoCompaignGroupQueriesMapper : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    Y_SAVELOAD_JOB(TrieStream, OwnerToGroups, HashToGroup, AutoCampaignConfig, ShardNo)

public:
    const static int SHARDS_COUNT = 2;

    TAutoCompaignGroupQueriesMapper() = default;
    TAutoCompaignGroupQueriesMapper(const TVector<char> &trieStream, const THashMap<TString, TVector<ui32>> &ownerToGroups,
        const THashMap<ui32, TString> &hashToGroup, const THashMap<TString, TAutoCampaignConfigRecord> &acConfig, int shardNo
    )
        : TrieStream(trieStream)
        , OwnerToGroups(ownerToGroups)
        , HashToGroup(hashToGroup)
        , AutoCampaignConfig(acConfig)
        , ShardNo(shardNo)
    {
    }

    void Start(TWriter* /*writer*/) override {
        Trie.Init(&TrieStream[0], TrieStream.size());
    }

    void Do(TReader *input, TWriter *output) override {
        NYT::TNode dstRow;
        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();
            const TString host = row[NHost2Vec::F_HOST].AsString();
            const TString query = row[F_QUERY].AsString();
            const TString url = row[F_URL].AsString();
            const bool isNav = row[F_IS_NAV].AsBool();

            TString rhost = host;
            ReverseInPlace(rhost);

            const ui64 queryId = FnvHash<ui64>(query.data(), query.size());
            const ui64 urlId = FnvHash<ui64>(url.data(), url.size());

            for (auto it = MakePrefixIterator(Trie, rhost.data(), rhost.size()); it; ++it) {
                const TString owner = host.substr(host.size() - it.GetPrefixLen());
                const bool isSubdomain = NUtils::IsSubdomain(host, owner);

                if (!isSubdomain) {
                    continue;
                }

                auto groupIt = OwnerToGroups.find(owner);
                if (groupIt != OwnerToGroups.end()) {
                    for (ui32 groupId : groupIt->second) {
                        if ((groupId % SHARDS_COUNT) == ShardNo) {
                            const TString &group = HashToGroup[groupId];
                            const auto cfgIt = AutoCampaignConfig.find(group);

                            if (cfgIt == AutoCampaignConfig.end()) {
                                continue;
                            }

                            if (cfgIt->second.VitalQuery && isNav) {
                                continue;
                            }

                            output->AddRow(NYT::TNode()
                                (NHost2Vec::F_GROUP, group)
                                (F_QUERY_ID, queryId)
                                (F_REGION_ID, row[F_REGION_ID])
                                (F_POSITION, row[F_POSITION])
                                (NHost2Vec::F_HOST, host)
                                (F_SHOWS, row[F_SHOWS])
                                (F_CLICKS, row[F_CLICKS])
                                (F_URL_ID, urlId)
                                (F_IS_NAV, isNav)
                            );
                        }
                    }
                }
            }
        }
    }

public:
    TVector<char> TrieStream;
    TCompactTrie<char> Trie;
    THashMap<TString, TVector<ui32>> OwnerToGroups;
    THashMap<ui32, TString> HashToGroup;
    THashMap<TString, TAutoCampaignConfigRecord> AutoCampaignConfig;
    int ShardNo = 0;
};

REGISTER_MAPPER(TAutoCompaignGroupQueriesMapper)

struct TReduceJoinAutoCampaignSourceQueries : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    //reduce by Query, Host, RegionId, Position
    void Do(TReader *input, TWriter *output) override {
        const int TABLENO_SOURCE_QUERIES = 0;
        const int TABLENO_GROUPS = 1;

        size_t totalShows = 0;
        size_t totalClicks = 0;
        size_t weightedPosition = 0;
        bool thereIsSource = false;

        for (; input->IsValid(); input->Next()) {
            NYT::TNode row = input->GetRow();
            switch(input->GetTableIndex()) {
            case TABLENO_SOURCE_QUERIES: {
                size_t shows = row[F_SHOWS].AsUint64();
                totalShows += shows;
                totalClicks += row[F_CLICKS].AsUint64();
                weightedPosition += row[F_POSITION].AsUint64() * shows;
                thereIsSource = true;
                break;
            }
            case TABLENO_GROUPS:
                if (thereIsSource) {
                    size_t position = totalShows > 0 ? weightedPosition / totalShows : Max<size_t>();
                    output->AddRow(row
                        (F_SHOWS, totalShows)
                        (F_CLICKS, totalClicks)
                        (F_POSITION, position)
                    );
                }
                break;
            default:
                ythrow yexception() << "unknown input table";
            }
        }
    }
};

REGISTER_REDUCER(TReduceJoinAutoCampaignSourceQueries)

struct TReduceEnrichAutoCampaignQueriesWithParentRegionId : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    Y_SAVELOAD_JOB(Geobase, AutoCampaignConfig)

public:
    TReduceEnrichAutoCampaignQueriesWithParentRegionId() = default;
    TReduceEnrichAutoCampaignQueriesWithParentRegionId(const TString &geobase, const THashMap<TString, TAutoCampaignConfigRecord> &acConfig)
        : Geobase(geobase)
        , AutoCampaignConfig(acConfig)
    {
    }

    void Start(TWriter* /*writer*/) override {
        Lookup.Reset(new NGeobase::TLookup(Geobase));
    }

    //ReduceBy RegionId
    void Do(TReader *input, TWriter *output) override {
        NYT::TNode row = input->GetRow();
        const auto regionId = static_cast<NGeobase::TId>(row[F_REGION_ID].AsUint64());
        const auto parentRegionId = static_cast<ui64>(Lookup->GetCountryId(regionId));

        for (; input->IsValid(); input->Next()) {
            row = input->GetRow();

            const TString group = row[NHost2Vec::F_GROUP].AsString();
            const auto cfgIt = AutoCampaignConfig.find(group);
            if (cfgIt == AutoCampaignConfig.end()) {
                continue;
            }

            const TAutoCampaignConfigRecord &cfg = cfgIt->second;
            if (cfg.ParentRegionId == parentRegionId) {
                output->AddRow(row
                    (F_PARENT_REGION_ID, parentRegionId)
                );
            }
        }
    }

public:
    TString Geobase;
    THolder<NGeobase::TLookup> Lookup;
    THashMap<TString, TAutoCampaignConfigRecord> AutoCampaignConfig;
};

REGISTER_REDUCER(TReduceEnrichAutoCampaignQueriesWithParentRegionId)

struct TReduceEnrichAutoCampaignQueriesWithFilteredQueries : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    //reduce by Query
    void Do(TReader *input, TWriter *output) override {
        NYT::TNode row = input->GetRow();
        const TString query = row[F_QUERY].AsString();

        TString filteredQuery;
        int filteredFlags = 0;
        if (!FilterAutoCampaignQuery(query, filteredQuery, filteredFlags)) {
            return;
        }

        for (; input->IsValid(); input->Next()) {
            row = input->GetRow();
            output->AddRow(row
               (F_FILTERED_FLAGS, filteredFlags)
               (F_FILTERED_QUERY, filteredQuery)
            );
        }
    }
};

REGISTER_REDUCER(TReduceEnrichAutoCampaignQueriesWithFilteredQueries)

struct TCopyMapper : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    void Do(TReader *input, TWriter *output) override {
        for (; input->IsValid(); input->Next()) {
            output->AddRow(input->GetRow());
        }
    }
};

REGISTER_MAPPER(TCopyMapper)

void LoadAutoCampaignConfig(NYT::IClientBasePtr client, const TString &configTable, THashMap<TString, TAutoCampaignConfigRecord> &acConfig) {
    const char *F_DOMAIN = "Domain";
    acConfig.clear();
    auto reader = client->CreateTableReader<NYT::TNode>(configTable);
    for (; reader->IsValid(); reader->Next()) {
        const NYT::TNode &row = reader->GetRow();
        const TString domain = row[F_DOMAIN].AsString();
        acConfig[domain] = TAutoCampaignConfigRecord(row);
    }
    LOG_INFO("autocampaign config loaded %lu domains", acConfig.size());
}

void TaskGroupAutoCampaignQueries(NYT::IClientBasePtr clientQueries, const NHost2Vec::TTableConfig &host2VecTables,
    const TRivalsTables &rivalsTables, THashMap<TString, TAutoCampaignConfigRecord> &autoCampaignConfig)
{
    NYTUtils::CreatePath(clientQueries, rivalsTables.IntmRawGroups);
    NYTUtils::CreatePath(clientQueries, rivalsTables.IntmEnrichedGroups);

    THashMap<ui32, TString> hashToGroup;
    NHost2Vec::LoadGroupsHashes(clientQueries, host2VecTables.GroupsHash, hashToGroup);

    THashMap<TString, TVector<ui32>> ownerToGroups;
    TVector<char> trieStream;
    NHost2Vec::BuildHostToGroupTrie(clientQueries, host2VecTables.HostGroupsHash, ownerToGroups, trieStream);

    TAtomic processedShards = 0;
    THolder<IThreadPool> processQueue(CreateThreadPool(2));
    for (int shardNo = 0; shardNo < TAutoCompaignGroupQueriesMapper::SHARDS_COUNT; shardNo++) {
        processQueue->SafeAddFunc([=, &hashToGroup, &ownerToGroups, &trieStream, &clientQueries, &processedShards]() {
            try {
                NYT::TSortColumns KC_OUTPUT(NHost2Vec::F_GROUP, NHost2Vec::F_HOST, F_QUERY, F_REGION_ID);
                const TString rawGroups = NYTUtils::JoinPath(rivalsTables.IntmRawGroups, Sprintf("%02d", shardNo));
                const TString enrichedGroups = NYTUtils::JoinPath(rivalsTables.IntmEnrichedGroups, Sprintf("%02d", shardNo));

                NYT::ITransactionPtr tx = clientQueries->StartTransaction();
                TOpRunner(tx)
                    .InputNode(rivalsTables.EnrichedQueries)
                    .OutputNode(rawGroups)
                    .MapperMemoryLimit(MEMORY_LIMIT_6GB)
                    .ReducerMemoryLimit(MEMORY_LIMIT_4GB)
                    .UseTmpfsInMapper()
                    .UseTmpfsInReducer()
                    .ReduceBy(NHost2Vec::F_GROUP, F_QUERY_ID, F_REGION_ID)
                    .MapReduce(
                        new TAutoCompaignGroupQueriesMapper(trieStream, ownerToGroups, hashToGroup, autoCampaignConfig, shardNo),
                        new TReduceProcessGroups(false /*do not filter queries*/)
                     )
                    .SortBy(F_QUERY_ID)
                    .Sort(rawGroups)
                ;
                tx->Commit();

                EnrichGroups(clientQueries, rawGroups, enrichedGroups, rivalsTables);
                AtomicIncrement(processedShards);
            } catch (yexception &e) {
                LOG_ERROR("direct, process group shard %d error: %s", shardNo, e.what());
            }
        });
    }
    processQueue->Stop();

    if (processedShards == TAutoCompaignGroupQueriesMapper::SHARDS_COUNT) {
        MergeEnrichedGroups(clientQueries, rivalsTables.IntmEnrichedGroups, rivalsTables.EnrichedGroups);
    } else {
        LOG_ERROR("direct, some shards were not completed");
    }
}

void TaskBuildAutoCampaignQueries(const TConfig &config) try {
    TSimpleTimer timer;
    if (!config.CATALOGIA_ENABLED_BUILD_DATA) {
        LOG_WARN("building autocampaign tables is disabled");
        return;
    }

    NYT::IClientPtr client = NYT::CreateClient(config.MR_SERVER_HOST_QUERIES);
    const TString domainsTable = NYTUtils::JoinPath(config.TABLE_AUTOCAMPAIGN_ROOT, "domains"); //YT scheduler right now has strange support JobCount spec for dynamic tables
    TOpRunner(client)
        .InputNode(config.TABLE_AUTOCAMPAIGN_SOURCE_DOMAINS)
        .OutputNode(domainsTable)
        .Map(new TCopyMapper)
    ;

    THashMap<TString, TAutoCampaignConfigRecord> acConfig;
    LoadAutoCampaignConfig(client, config.TABLE_AUTOCAMPAIGN_SOURCE_DOMAINS, acConfig);
    MonitorAutocampaignDomains(config.MONITOR_PERFORMANCE_SUFFIX, acConfig.size());

    NHost2Vec::TTableConfig host2VecTables(
        domainsTable,
        config.TABLE_AUTOCAMPAIGN_SOURCE_HOST2VEC,
        config.TABLE_AUTOCAMPAIGN_SOURCE_HOST2VEC_HOST_GROUPS,
        config.TABLE_AUTOCAMPAIGN_SOURCE_HOST2VEC_HOSTS,
        config.TABLE_AUTOCAMPAIGN_SOURCE_HOST2VEC_GROUPS
    );

    NHost2Vec::TaskUpdateHost2vecGroups(client, F_DOMAIN, host2VecTables);

    TRivalsTables rivalsTables(
        config.TABLE_AUTOCAMPAIGN_EXTRACTED_QUERIES,
        config.TABLE_AUTOCAMPAIGN_EXTRACTED_QUERIES_FILTER,
        config.TABLE_AUTOCAMPAIGN_ENRICHED_QUERIES,
        config.TABLE_AUTOCAMPAIGN_IDS_QUERIES,
        config.TABLE_AUTOCAMPAIGN_IDS_URLS,
        config.TABLE_AUTOCAMPAIGN_INTM_RAW_GROUPS,
        config.TABLE_AUTOCAMPAIGN_INTM_ENRICHED_GROUPS,
        config.TABLE_AUTOCAMPAIGN_ENRICHED_GROUPS
    );

    TaskBuildQueries(client, config, host2VecTables, rivalsTables, 0, false);
    TaskEnrichQueries(client, config, rivalsTables);
    TaskGroupAutoCampaignQueries(client, host2VecTables, rivalsTables, acConfig);

    NYT::TTableSchema tableSchema = NYTUtils::GetTableSchema(client, config.TABLE_AUTOCAMPAIGN_ENRICHED_GROUPS);
    tableSchema = NYTUtils::DropSortOrder(tableSchema);
    tableSchema.AddColumn(NYT::TColumnSchema().Name(F_PARENT_REGION_ID).Type(NYT::VT_UINT64));
    tableSchema.AddColumn(NYT::TColumnSchema().Name(F_FILTERED_QUERY).Type(NYT::VT_STRING));
    tableSchema.AddColumn(NYT::TColumnSchema().Name(F_FILTERED_FLAGS).Type(NYT::VT_INT64));

    NYT::ITransactionPtr tx = client->StartTransaction();
    TOpRunner(tx)
        .SortBy(F_REGION_ID)
        .Sort(config.TABLE_AUTOCAMPAIGN_ENRICHED_GROUPS)

        .InputNode(config.TABLE_AUTOCAMPAIGN_ENRICHED_GROUPS)
        .OutputNode(NYT::TRichYPath(config.TABLE_AUTOCAMPAIGN_ENRICHED_GROUPS).Schema(tableSchema))
        .UseTmpfs()
        .LocalFile(TConfig::GEOBASE_FILE_LITE)
        .MemoryLimit(MEMORY_LIMIT_2GB)
        .ReduceBy(F_REGION_ID)
        .Reduce(new TReduceEnrichAutoCampaignQueriesWithParentRegionId(TConfig::GEOBASE_FILE_LITE, acConfig))

        .SortBy(F_QUERY)
        .Sort(config.TABLE_AUTOCAMPAIGN_ENRICHED_GROUPS)

        .InputNode(config.TABLE_AUTOCAMPAIGN_ENRICHED_GROUPS)
        .OutputNode(config.TABLE_AUTOCAMPAIGN_ENRICHED_GROUPS)
        .ReduceBy(F_QUERY)
        .Reduce(new TReduceEnrichAutoCampaignQueriesWithFilteredQueries())

        .SortBy(NHost2Vec::F_GROUP, NHost2Vec::F_HOST, F_QUERY, F_FILTERED_QUERY, F_PARENT_REGION_ID, F_REGION_ID)
        .Sort(config.TABLE_AUTOCAMPAIGN_ENRICHED_GROUPS)
    ;
    tx->Commit();

    NYT::IClientPtr mainClient = NYT::CreateClient(config.MR_SERVER_HOST_MAIN);
    TOpRunner(mainClient).Drop(config.TABLE_AUTOCAMPAIGN_EXPORT_GROUPS);

    TTransferManager(config.GetYTToken()).PostTaskAndWait(
        config.MR_SERVER_HOST_QUERIES, config.TABLE_AUTOCAMPAIGN_ENRICHED_GROUPS,
        config.MR_SERVER_HOST_MAIN, config.TABLE_AUTOCAMPAIGN_EXPORT_GROUPS
    );

    MonitorAutocampaignQueriesPTime(config.MONITOR_PERFORMANCE_SUFFIX, timer.Get().Minutes());
} catch (yexception &e) {
    LOG_ERROR("unable to complete autocampaign queries task: %s", e.what());
}

} //namespace NAutoCampaign
} //namespace NWebmaster
