#include <util/generic/set.h>
#include <util/generic/size_literals.h>
#include <util/string/cast.h>
#include <util/string/join.h>

#include <wmconsole/version3/wmcutil/datetime.h>
#include <wmconsole/version3/wmcutil/log.h>
#include <wmconsole/version3/wmcutil/regex.h>
#include <wmconsole/version3/wmcutil/yt/yt_runner.h>
#include <wmconsole/version3/wmcutil/yt/yt_utils.h>

#include <wmconsole/version3/library/jupiter/jupiter.h>
#include <wmconsole/version3/protos/urltree.pb.h>
#include <wmconsole/version3/processors/indexing/sitetree/protos/searchbase.pb.h>

#include "config.h"
#include "monitor.h"
#include "queries.h"
#include "week_config.h"

namespace NWebmaster {

namespace {
const char *ATTR_SOURCE_TABLE           = "source_table";
const char *F_HOST                      = "Host";
const char *F_PATH                      = "Path";
const char *F_YAMR_KEY                  = "key";
const char *F_YAMR_VALUE                = "value";
const char *F_DIFF_STATUS               = "DiffStatus";
const char *F_NUM_OF_DOCS               = "NumOfDocs";
const char *F_NUM_OF_DOCS_ON_SEARCH     = "NumOfDocsOnSearch";
const char *F_NUM_NEW_DOCS_ON_SEARCH    = "NewDocsOnSearch";
const char *F_NUM_GONE_DOCS_ON_SEARCH   = "GoneDocsOnSearch";
const char *F_TIMESTAMP                 = "Timestamp";
}

static NYT::TRichYPath DebugPath(const TString &table) {
    NYT::TRichYPath path(table);
    //path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("https://lenta.ru"))));
    //path.AddRange(NYT::TReadRange().Exact(NYT::TReadLimit().Key(NYT::TKey("https://webmaster.yandex.ru"))));
    return path;
}

struct TExtractSearchCountersMapper: public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    void Do(TReader *input, TWriter *output) override {
        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();
            const TString host = row[F_YAMR_KEY].AsString();
            NWebmaster::proto::urltree::HostInfo msg;
            Y_PROTOBUF_SUPPRESS_NODISCARD msg.ParseFromString(row[F_YAMR_VALUE].AsString());

            for (int i = 0; i < msg.nodes_size(); i++) {
                if (msg.nodes(i).search_source_id() != proto::urltree::PRODUCTION) {
                    continue;
                }

                if (msg.nodes(i).shard_id() == proto::urltree::RU && msg.nodes(i).name() == "/") {
                    output->AddRow(NYT::TNode()
                        (F_HOST, host)
                        (F_NUM_OF_DOCS, msg.nodes(i).num_of_docs())
                        (F_NUM_OF_DOCS_ON_SEARCH, msg.nodes(i).num_of_docs_on_search())
                        (F_NUM_NEW_DOCS_ON_SEARCH, msg.nodes(i).num_of_new_search_docs())
                        (F_NUM_GONE_DOCS_ON_SEARCH, msg.nodes(i).num_of_gone_search_docs())
                    );
                    break;
                }
            }
        }
    }
};

REGISTER_MAPPER(TExtractSearchCountersMapper)

//ReduceBy Host
struct TExtractSearchHistoryReducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    Y_SAVELOAD_JOB(WeekConfig, SearchBases)
public:
    TExtractSearchHistoryReducer() = default;
    TExtractSearchHistoryReducer(const TDigestWeekConfig &weekConfig, const THashMap<time_t, time_t> &searchBases)
        : WeekConfig(weekConfig)
        , SearchBases(searchBases)
    {
    }

    void Do(TReader *input, TWriter *output) override {
        const size_t MAX_SAMPLES = 5;
        THashMap<TString, int> urlsDiffCounters;
        TDeque<NYT::TNode> rows;

        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();
            const auto status = static_cast<NWebmaster::NProto::EDiffStatus>(row[F_DIFF_STATUS].AsInt64());
            const auto &path = row[F_PATH].AsString();
            if (status == NProto::EDiffStatus::NEW) {
                urlsDiffCounters[path] += 1;
            } else if (status == NProto::EDiffStatus::GONE) {
                urlsDiffCounters[path] -= 1;
            }
            rows.push_back(row);
        }

        TMultiMap<time_t, NYT::TNode> history;
        for (const auto &row : rows) {
            const time_t timestamp = row[F_TIMESTAMP].AsInt64();
            if (urlsDiffCounters.at(row[F_PATH].AsString()) == 0) { //url is collapsed - skip it
                continue;
            }

            if (SearchBases.contains(timestamp)) {
                if (WeekConfig.NewWeek.In(SearchBases.at(timestamp))) {
                    history.insert(std::make_pair(timestamp, row));
                }
            }
        }

        size_t i = 0;
        for (auto it = history.rbegin(); it != history.rend() && i < MAX_SAMPLES; ++it, ++i) {
            output->AddRow(it->second);
        }
    }

public:
    TDigestWeekConfig WeekConfig;
    THashMap<time_t, time_t> SearchBases;
};

REGISTER_REDUCER(TExtractSearchHistoryReducer)

TString GetSearchHistoryTableName(const TDigestWeekConfig &weekConfig) {
    const auto &config = TConfig::CInstance();
    return NYTUtils::JoinPath(config.TABLE_DIGEST_SOURCE_SEARCH_HISTORY, weekConfig.NewWeek.WeekName());
}

void PrepareImportantUrlsSource(NYT::IClientBasePtr clientSearch) {
    const auto &config = TConfig::CInstance();
    const TString importantUrlsSourceTable = NYTUtils::JoinPath(config.TABLE_DIGEST_SOURCE_SEARCH_IMPORTANT_URLS, date2str(Now().TimeT()));
    NYT::ITransactionPtr tx = clientSearch->StartTransaction();
    TOpRunner(tx)
        .Copy(config.TABLE_SOURCE_SEARCH_IMPORTANT_URLS, importantUrlsSourceTable)
        .SortBy("Host", "Path")
        .Sort(importantUrlsSourceTable)
    ;
    tx->Commit();
}

void PrepareSearchHistorySource(NYT::IClientBasePtr clientSearch, const TDigestWeekConfig &weekConfig) {
    const auto &config = TConfig::CInstance();
    const TString searchHistorySourceTable = GetSearchHistoryTableName(weekConfig);
    if (clientSearch->Exists(searchHistorySourceTable)) {
        LOG_INFO("source search history, table %s is already processed", searchHistorySourceTable.c_str());
        return;
    }

    NYT::ITransactionPtr tx = clientSearch->StartTransaction();

    THashMap<time_t, time_t> searchBases;
    auto input = tx->CreateTableReader<NYT::TNode>(config.TABLE_SOURCE_SEARCH_BASES);
    for (; input->IsValid(); input->Next()) {
        const NYT::TNode &row = input->GetRow();
        searchBases[row["collection_date"].AsUint64()] = row["switch_date"].AsUint64();
    }

    TOpRunner(tx)
        .InputNode(DebugPath(config.TABLE_SOURCE_SEARCH_HISTORY))
        .OutputNode(NYT::TRichYPath(searchHistorySourceTable).SortedBy(F_HOST))
        .MemoryLimit(2_GBs)
        .ReduceBy(F_HOST)
        .Reduce(new TExtractSearchHistoryReducer(weekConfig, searchBases))
        .SortBy(F_HOST)
        .Sort(searchHistorySourceTable)
    ;
    tx->Commit();
}

void PrepareSitetreeSource(NYT::IClientBasePtr clientSearch) {
    const auto &config = TConfig::CInstance();
    TDeque<NYTUtils::TTableInfo> tables;
    if (NYTUtils::GetTableList(clientSearch, config.TABLE_SOURCE_SEARCH_SITETREE_PREFIX, tables, Max<size_t>()) == 0) {
        ythrow yexception() << "source search sitetree, there is no sitetree tables";
    }

    TRegularExpression regex("\\w+-(\\d+)$");
    TMap<time_t, TString> sourceTables;
    for (const NYTUtils::TTableInfo &table : tables) {
        const TString tableName = NYTUtils::GetTableName(table.Name);
        if (tableName.StartsWith("transmitted-") || tableName.StartsWith("ready-")) {
            TVector<TString> timestampStr;
            if (regex.GetMatches(tableName, timestampStr) == 1) {
                sourceTables[FromString<time_t>(timestampStr[0])] = table.Name;
            }
        }
    }

    if (sourceTables.empty()) {
        ythrow yexception() << "source search sitetree, there is no sitetree tables";
    }

    const TString sitetreeSourceTable = NYTUtils::JoinPath(config.TABLE_DIGEST_SOURCE_SEARCH_SITETREE, date2str(Now().TimeT()));
    const TString sourceInputTable = sourceTables.rbegin()->second;

    NYT::TTableSchema sitetreeSourceSchema;
    sitetreeSourceSchema.Strict(true);
    sitetreeSourceSchema.AddColumn(NYT::TColumnSchema().Name(F_HOST).Type(NYT::VT_STRING).SortOrder(NYT::SO_ASCENDING));
    sitetreeSourceSchema.AddColumn(NYT::TColumnSchema().Name(F_NUM_OF_DOCS).Type(NYT::VT_INT64));
    sitetreeSourceSchema.AddColumn(NYT::TColumnSchema().Name(F_NUM_OF_DOCS_ON_SEARCH).Type(NYT::VT_INT64));
    sitetreeSourceSchema.AddColumn(NYT::TColumnSchema().Name(F_NUM_NEW_DOCS_ON_SEARCH).Type(NYT::VT_INT64));
    sitetreeSourceSchema.AddColumn(NYT::TColumnSchema().Name(F_NUM_GONE_DOCS_ON_SEARCH).Type(NYT::VT_INT64));

    NYT::ITransactionPtr tx = clientSearch->StartTransaction();
    TOpRunner(tx)
        .InputNode(DebugPath(sourceInputTable))
        .OutputNode(NYT::TRichYPath(sitetreeSourceTable).Schema(sitetreeSourceSchema))
        .EnableOrderedMap()
        .Map(new TExtractSearchCountersMapper)
        .SortBy(F_HOST)
        .Sort(sitetreeSourceTable)
    ;
    NYTUtils::SetAttr(tx, sitetreeSourceTable, ATTR_SOURCE_TABLE, sourceInputTable);
    tx->Commit();
}

} //namespace NWebmaster
