#include <util/generic/size_literals.h>

#include <robot/library/yt/static/command.h>
#include <robot/library/yt/static/tags.h>
#include <robot/jupiter/protos/acceptance.pb.h>

#include <wmconsole/version3/wmcutil/log.h>
#include <wmconsole/version3/wmcutil/regex.h>
#include <wmconsole/version3/wmcutil/thread.h>
#include <wmconsole/version3/wmcutil/url.h>

#include "config.h"
#include "process_turbo_bans.h"
#include "field_names.h"

using namespace NJupiter;

namespace NWebmaster {
namespace NTurbo {

const size_t MAX_BAN_SAMPLES = 100;

// костыль для api-пушей, пролезающих как rss
const TString& DefaultIfUUID(const TString &feed, const TString &dflt) {
    static const TRegularExpression UUID_REGEXP("^[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}$");
    TVector<TString> matches;
    if (UUID_REGEXP.GetMatches(feed, matches)) {
        return dflt;
    } else {
        return feed;
    }
}

struct TTurboBansMapper : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
public:
    const static int TABLE_BANS_IDX = 0;

    TTurboBansMapper() = default;

    void Do(TReader *input, TWriter *output) override {
        const static THashSet<TString> POSSIBLE_SOURCES = { "rss", "autoparser", "wmc_rss" };
        const static THashSet<TString> POSSIBLE_DOCS_SOURCES = { "rss", "yml"};

        for (; input->IsValid(); input->Next()) {
            THttpURL url;
            NYT::TNode row = input->GetRow();
            if (input->GetTableIndex() == TABLE_BANS_IDX) {
                // check for sources
                TVector<NYT::TNode> sources = row[FIELD_SOURCES].AsList();
                if (!sources.empty()) {
                    bool hasInterestingSources = false;
                    // check for any of possible sources
                    for (const NYT::TNode& source : sources) {
                        if (POSSIBLE_SOURCES.find(source.AsString()) != POSSIBLE_SOURCES.end()) {
                            hasInterestingSources = true;
                            break;
                        }
                    }
                    // not interesting ban, skipping
                    if (!hasInterestingSources) {
                        continue;
                    }
                }

                // get host from marker
                NUtils::ParseUrl(url, row[FIELD_MARKER].AsString());
            } else {
                const TString source = row[FIELD_SOURCE].AsString();

                if (!row[FIELD_STILL_BANNED].AsBool() || !POSSIBLE_DOCS_SOURCES.contains(source)) {
                    continue;
                }
                // get host from document
                NUtils::ParseUrl(url, row[FIELD_DOCUMENT].AsString());
            }
            row(FIELD_TABLE, input->GetTableIndex());
            row(FIELD_HOST, NUtils::FixDomainPrefix(url.GetHost()));
            output->AddRow(row);
        }
    }
};
REGISTER_MAPPER(TTurboBansMapper)

struct TTurboBansReducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
public:
    TTurboBansReducer() = default;

    void Do(TReader *input, TWriter *output) override {
        const static THashSet<TString> DOC_BAN_TYPES { "doc_domain", "doc_url" };

        // collect ban rule, samples and stats
        NYT::TNode result;
        bool hasBanRule = false;
        result(FIELD_DOMAIN, input->GetRow()[FIELD_HOST]);
        NYT::TNode samples = NYT::TNode::CreateList();

        THashMap<TString, THashMap<TString, ui32>> docsMap;

        for (; input->IsValid(); input->Next()) {
            const NYT::TNode& row = input->GetRow();
            if (row[FIELD_TABLE].AsUint64() == TTurboBansMapper::TABLE_BANS_IDX) {
                result(FIELD_MARKER, row[FIELD_MARKER])
                        (FIELD_BAN_TYPE, row[FIELD_BAN_TYPE])
                        (FIELD_BAN_REASON, row[FIELD_BAN_REASON])
                        (FIELD_DESCRIPTION, row[FIELD_DESCRIPTION])
                        (FIELD_APPLY_TIMESTAMP, row[FIELD_APPLY_TIMESTAMP])
                        (FIELD_TIMESTAMP, row[FIELD_TIMESTAMP])
                        (FIELD_BAN_ERROR, NYTUtils::FromNodeOrDefault<TString>(row[FIELD_BAN_ERROR], ""));
                hasBanRule = true;
            } else {
                // ban sample
                // counting
                const TString feed = DefaultIfUUID(NYTUtils::FromNodeOrDefault<TString>(row[FIELD_FEED], ""), "");
                docsMap[row[FIELD_SOURCE].AsString()][feed]++;
                if (samples.Size() < MAX_BAN_SAMPLES) {
                    samples.Add(NYT::TNode()
                                 (FIELD_DOCUMENT, row[FIELD_DOCUMENT])
                                 (FIELD_FEED, feed)
                    );
                }
            }
        }
        if (!hasBanRule) {
            return;
        }

        NYT::TNode stats = NYT::TNode::CreateList();
        for (const auto& sourceMap : docsMap) {
            NYT::TNode feeds = NYT::TNode::CreateList();
            for (const auto& pair : sourceMap.second) {
                feeds.Add(NYT::TNode()
                         (FIELD_URL, pair.first)
                         (FIELD_COUNT, pair.second)
                );
            }
            stats.Add(NYT::TNode()
                     (FIELD_SOURCE, sourceMap.first)
                     (FIELD_FEEDS, feeds)
            );
        }
        // doc bans without samples - not needed
        if (DOC_BAN_TYPES.contains(result[FIELD_BAN_TYPE].AsString()) && samples.Size() == 0) {
            return;
        }
        result(FIELD_BAN_SAMPLES, samples);
        result(FIELD_STATS, stats);
        output->AddRow(result);
    }
};
REGISTER_REDUCER(TTurboBansReducer)

int ProcessTurboBans(int, const char **) {
    const auto& config = TConfig::CInstance();
    NYT::IClientPtr client = NYT::CreateClient(config.MR_TURBO_SERVER_HOST);
    NYT::ITransactionPtr tx = client->StartTransaction();

    NYT::TRichYPath output(config.TABLE_BANS_STATE);
    output.Schema(NYT::TTableSchema()
                          .AddColumn(FIELD_DOMAIN, NYT::EValueType::VT_STRING)
                          .AddColumn(FIELD_APPLY_TIMESTAMP, NYT::EValueType::VT_INT64)
                          .AddColumn(FIELD_BAN_ERROR, NYT::EValueType::VT_STRING)
                          .AddColumn(FIELD_BAN_REASON, NYT::EValueType::VT_STRING)
                          .AddColumn(FIELD_BAN_TYPE, NYT::EValueType::VT_STRING)
                          .AddColumn(FIELD_DESCRIPTION, NYT::EValueType::VT_STRING)
                          .AddColumn(FIELD_MARKER, NYT::EValueType::VT_STRING)
                          .AddColumn(FIELD_TIMESTAMP, NYT::EValueType::VT_INT64)
                          .AddColumn(FIELD_STATS, NYT::EValueType::VT_ANY)
                          .AddColumn(FIELD_BAN_SAMPLES, NYT::EValueType::VT_ANY));

    TMapReduceCmd<TTurboBansMapper, TTurboBansReducer>(tx)
        .Input<NYT::TNode>(config.TABLE_SOURCE_BANS)
        .Input<NYT::TNode>(config.TABLE_SOURCE_BANNED_DOCS)
        .Output<NYT::TNode>(output)
        .ReduceBy({FIELD_HOST, FIELD_BAN_ID})
        .Do();

    TSortCmd<NYT::TNode>(tx)
        .Input<NYT::TNode>(output.Path_)
        .Output<NYT::TNode>(output.Path_)
        .By(FIELD_DOMAIN)
        .Do();

    tx->Commit();

    return 0;
}

} //namespace NTurbo
} //namespace NWebmaster

