#include <util/draft/date.h>
#include <util/random/random.h>
#include <util/string/join.h>
#include <util/string/printf.h>

#include <mapreduce/yt/interface/protos/yamr.pb.h>

#include <robot/library/yt/static/command.h>
#include <robot/library/yt/static/table.h>
#include <robot/library/yt/static/tags.h>

#include <wmconsole/version3/searchqueries-mr/conf/yt.h>
#include <wmconsole/version3/searchqueries-mr/protos/user_sessions.pb.h>
#include <wmconsole/version3/processors/acceptance/conf/config.h>
#include <wmconsole/version3/processors/acceptance/protos/acceptance.pb.h>
#include <wmconsole/version3/processors/indexing/robots/conf/config.h>
#include <wmconsole/version3/protos/exported.pb.h>
#include <wmconsole/version3/wmcutil/log.h>
#include <wmconsole/version3/wmcutil/yt/triggers.h>

#include "common.h"
#include "task_robots.h"

namespace NWebmaster {
namespace NAcceptance {

using namespace NJupiter;

TInputTag<NYT::TYamr> RobotsPrevInputTag      (1);
TInputTag<NYT::TYamr> RobotsNewInputTag       (2);

TOutputTag<NProto::TRobotsDiff> RobotsDiffOutputTag (1);

struct TRobotsDiffCounter {
    struct THttpCodeGetter {
        inline double operator()(const proto::urltree::RobotsTxtInfo &msg) const {
            return static_cast<double>(msg.http_code());
        }
    };

    struct TContentLengthGetter {
        inline double operator()(const proto::urltree::RobotsTxtInfo &msg) const {
            return static_cast<double>(msg.content().size());
        }
    };

    struct TNumOfErrorsGetter {
        inline double operator()(const proto::urltree::RobotsTxtInfo &msg) const {
            return static_cast<double>(msg.errors_size());
        }
    };

    struct TNumOfWarningsGetter {
        inline double operator()(const proto::urltree::RobotsTxtInfo &msg) const {
            return static_cast<double>(msg.warnings_size());
        }
    };

    struct THostDirectiveLengthGetter {
        inline double operator()(const proto::urltree::RobotsTxtInfo &msg) const {
            return static_cast<double>(msg.parsed().host().size());
        }
    };

    struct TNumOfCleanParamsGetter {
        inline double operator()(const proto::urltree::RobotsTxtInfo &msg) const {
            return static_cast<double>(msg.parsed().clean_params_size());
        }
    };

    struct TNumOfSitemapsGetter {
        inline double operator()(const proto::urltree::RobotsTxtInfo &msg) const {
            return static_cast<double>(msg.parsed().sitemaps_size());
        }
    };

    struct TDisallowAllGetter {
        inline double operator()(const proto::urltree::RobotsTxtInfo &msg) const {
            return msg.parsed().is_disallow_all() ? 1.0 : 0.0;
        }
    };

    struct TNumOfAcceptedLinesGetter {
        inline double operator()(const proto::urltree::RobotsTxtInfo &msg) const {
            return static_cast<double>(msg.parsed().accepted_lines_size());
        }
    };

    struct T2ndLevelHostGetter {
        inline double operator()(const proto::urltree::RobotsTxtInfo &msg) const {
            return msg.parsed().host_2nd_level().empty() ? 0.0 : 1.0;
        }
    };

    struct T2ndLevelHostInDbGetter {
        inline double operator()(const proto::urltree::RobotsTxtInfo &msg) const {
            return msg.parsed().host_2nd_level_found_in_db() ? 1.0 : 0.0;
        }
    };

    template<class TGetter>
    static double GetDifference(const proto::urltree::RobotsTxtInfo &pr, const proto::urltree::RobotsTxtInfo &nr) {
        const static TGetter getter;
        const double pValue = getter(pr);
        const double nValue = getter(nr);
        if (pValue == 0) {
            if (nValue == 0) {
                return 0.0;
            }
            return 1.0;
        }

        return (nValue - pValue) / pValue;
    }

    void Set(const proto::urltree::RobotsTxtInfo &pr, const proto::urltree::RobotsTxtInfo &nr) {
        Level2ndHost        = GetDifference<T2ndLevelHostGetter>(pr, nr);
        Level2ndHostInDb    = GetDifference<T2ndLevelHostInDbGetter>(pr, nr);
        ContentLength       = GetDifference<TContentLengthGetter>(pr, nr);
        DisallowAll         = GetDifference<TDisallowAllGetter>(pr, nr);
        HostDirectiveLength = GetDifference<THostDirectiveLengthGetter>(pr, nr);
        HttpCode            = pr.http_code() == nr.http_code() ? 0.0 : 1.0;
        NumOfAcceptedLines  = GetDifference<TNumOfAcceptedLinesGetter>(pr, nr);
        NumOfCleanParams    = GetDifference<TNumOfCleanParamsGetter>(pr, nr);
        NumOfErrors         = GetDifference<TNumOfErrorsGetter>(pr, nr);
        NumOfSitemaps       = GetDifference<TNumOfSitemapsGetter>(pr, nr);
        NumOfWarnings       = GetDifference<TNumOfWarningsGetter>(pr, nr);
    }

public:
    double Level2ndHost        = 0;
    double Level2ndHostInDb    = 0;
    double ContentLength       = 0;
    double DisallowAll         = 0;
    double HostDirectiveLength = 0;
    double HttpCode            = 0;
    double NumOfAcceptedLines  = 0;
    double NumOfCleanParams    = 0;
    double NumOfErrors         = 0;
    double NumOfSitemaps       = 0;
    double NumOfWarnings       = 0;
};

struct TRobotsDiffReducer : public TTaggedReducer {
    TRobotsDiffReducer() = default;
    TRobotsDiffReducer(time_t timestamp)
        : Timestamp(timestamp)
    {
    }

    void Save(IOutputStream& stream) const override {
        ::Save(&stream, Timestamp);
        TTaggedReducer::Save(stream);
    }

    void Load(IInputStream& stream) override {
        ::Load(&stream, Timestamp);
        TTaggedReducer::Load(stream);
    }

    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        const TMaybe<NYT::TYamr> mbPrev = reader.GetSingleRowMaybe(RobotsPrevInputTag);
        const TMaybe<NYT::TYamr> mbNew  = reader.GetSingleRowMaybe(RobotsNewInputTag);

        TString host;
        double diffCount = 0;

        TRobotsDiffCounter dc;
        proto::urltree::RobotsTxtInfo prevRow, newRow;

        if (mbPrev.Defined()) {
            Y_PROTOBUF_SUPPRESS_NODISCARD prevRow.ParseFromString(mbPrev.GetRef().GetValue());
            host = mbPrev.GetRef().GetKey();
            if (!mbNew.Defined()) {
                diffCount = -1;
            }
        } else {
            host = mbNew.GetRef().GetKey();
            diffCount = 1;
        }

        if (mbNew.Defined()) {
            Y_PROTOBUF_SUPPRESS_NODISCARD newRow.ParseFromString(mbNew.GetRef().GetValue());
        }

        dc.Set(prevRow, newRow);

        NProto::TRobotsDiff dstMsg;
        dstMsg.SetHost(host);
        dstMsg.SetDiffCount(diffCount);

        dstMsg.SetDiffLevel2ndHost(dc.Level2ndHost);
        dstMsg.SetDiffLevel2ndHostInDb(dc.Level2ndHostInDb);
        dstMsg.SetDiffContentLength(dc.ContentLength);
        dstMsg.SetDiffDisallowAll(dc.DisallowAll);
        dstMsg.SetDiffHostDirectiveLength(dc.HostDirectiveLength);
        dstMsg.SetDiffHttpCode(dc.HttpCode);
        dstMsg.SetDiffNumOfAcceptedLines(dc.NumOfAcceptedLines);
        dstMsg.SetDiffNumOfCleanParams(dc.NumOfCleanParams);
        dstMsg.SetDiffNumOfErrors(dc.NumOfErrors);
        dstMsg.SetDiffNumOfSitemaps(dc.NumOfSitemaps);
        dstMsg.SetDiffNumOfWarnings(dc.NumOfWarnings);

        dstMsg.SetTimestamp(Timestamp);
        writer.AddRow(dstMsg, RobotsDiffOutputTag);
    }

public:
    time_t Timestamp = 0;
};

REGISTER_REDUCER(TRobotsDiffReducer)

//ReduceBy Host, Timestamp
struct TRobotsArchiveLimitReducer : public NYT::IReducer<NYT::TTableReader<NProto::TRobotsDiff>, NYT::TTableWriter<NProto::TRobotsDiff>> {
    void Do(TReader *input, TWriter *output) override {
        TMap<time_t, NProto::TRobotsDiff> rows;
        for (; input->IsValid(); input->Next()) {
            const auto &row = input->GetRow();
            rows[row.GetTimestamp()] = row;
            if (rows.size() > 100) {
                rows.erase(rows.begin()->first);
            }
        }
        for (const auto &obj : rows) {
            output->AddRow(obj.second);
        }
    }
};

REGISTER_REDUCER(TRobotsArchiveLimitReducer)

void ValidateRobotsDiffs(NYT::IClientBasePtr client) {
    const auto &cfg = TConfig::CInstance();

    TDeque<double> diffLevel2ndHost;
    TDeque<double> diffLevel2ndHostInDb;
    TDeque<double> diffContentLength;
    TDeque<double> diffDisallowAll;
    TDeque<double> diffHostDirectiveLength;
    TDeque<double> diffHttpCode;
    TDeque<double> diffNumOfAcceptedLines;
    TDeque<double> diffNumOfCleanParams;
    TDeque<double> diffNumOfErrors;
    TDeque<double> diffNumOfSitemaps;
    TDeque<double> diffNumOfWarnings;

    TMap<double, TString> diffLevel2ndHostSamples;
    TMap<double, TString> diffLevel2ndHostInDbSamples;
    TMap<double, TString> diffContentLengthSamples;
    TMap<double, TString> diffDisallowAllSamples;
    TMap<double, TString> diffHostDirectiveLengthSamples;
    TMap<double, TString> diffHttpCodeSamples;
    TMap<double, TString> diffNumOfAcceptedLinesSamples;
    TMap<double, TString> diffNumOfCleanParamsSamples;
    TMap<double, TString> diffNumOfErrorsSamples;
    TMap<double, TString> diffNumOfSitemapsSamples;
    TMap<double, TString> diffNumOfWarningsSamples;

    auto reader = TTable<NProto::TRobotsDiff>(client, cfg.TABLE_ACCEPTANCE_ROBOTS_STATISTICS).GetReader();
    for (; reader->IsValid(); reader->Next()) {
        const auto &row = reader->GetRow();
        AddSample(diffLevel2ndHost,         diffLevel2ndHostSamples,        row.GetDiffLevel2ndHost(),          row.GetHost());
        AddSample(diffLevel2ndHostInDb,     diffLevel2ndHostInDbSamples,    row.GetDiffLevel2ndHostInDb(),      row.GetHost());
        AddSample(diffContentLength,        diffContentLengthSamples,       row.GetDiffContentLength(),         row.GetHost());
        AddSample(diffDisallowAll,          diffDisallowAllSamples,         row.GetDiffDisallowAll(),           row.GetHost());
        AddSample(diffHostDirectiveLength,  diffHostDirectiveLengthSamples, row.GetDiffHostDirectiveLength(),   row.GetHost());
        AddSample(diffHttpCode,             diffHttpCodeSamples,            row.GetDiffHttpCode(),              row.GetHost());
        AddSample(diffNumOfAcceptedLines,   diffNumOfAcceptedLinesSamples,  row.GetDiffNumOfAcceptedLines(),    row.GetHost());
        AddSample(diffNumOfCleanParams,     diffNumOfCleanParamsSamples,    row.GetDiffNumOfCleanParams(),      row.GetHost());
        AddSample(diffNumOfErrors,          diffNumOfErrorsSamples,         row.GetDiffNumOfErrors(),           row.GetHost());
        AddSample(diffNumOfSitemaps,        diffNumOfSitemapsSamples,       row.GetDiffNumOfSitemaps(),         row.GetHost());
        AddSample(diffNumOfWarnings,        diffNumOfWarningsSamples,       row.GetDiffNumOfWarnings(),         row.GetHost());
    }

    bool rejected = false;
    rejected |= IsThresholdBroken("diffLevel2ndHost",           diffLevel2ndHost,           diffLevel2ndHostSamples, 0.99, 100.0);
    rejected |= IsThresholdBroken("diffLevel2ndHostInDb",       diffLevel2ndHostInDb,       diffLevel2ndHostInDbSamples, 0.99, 100.0);
    rejected |= IsThresholdBroken("diffContentLength",          diffContentLength,          diffContentLengthSamples, 0.99, 100.0);
    rejected |= IsThresholdBroken("diffDisallowAll",            diffDisallowAll,            diffDisallowAllSamples, 0.99, 100.0);
    rejected |= IsThresholdBroken("diffHostDirectiveLength",    diffHostDirectiveLength,    diffHostDirectiveLengthSamples, 0.99, 100.0);
    rejected |= IsThresholdBroken("diffHttpCode",               diffHttpCode,               diffHttpCodeSamples, 0.99, 100.0);
    rejected |= IsThresholdBroken("diffNumOfAcceptedLines",     diffNumOfAcceptedLines,     diffNumOfAcceptedLinesSamples, 0.99, 100.0);
    rejected |= IsThresholdBroken("diffNumOfCleanParams",       diffNumOfCleanParams,       diffNumOfCleanParamsSamples, 0.99, 100.0);
    rejected |= IsThresholdBroken("diffNumOfErrors",            diffNumOfErrors,            diffNumOfErrorsSamples, 0.99, 100.0);
    rejected |= IsThresholdBroken("diffNumOfSitemaps",          diffNumOfSitemaps,          diffNumOfSitemapsSamples, 0.99, 100.0);
    rejected |= IsThresholdBroken("diffNumOfWarnings",          diffNumOfWarnings,          diffNumOfWarningsSamples, 0.99, 100.0);
    if (rejected) {
        ythrow yexception() << "RobotsTxt is rejected";
    }
}

int AcceptRobots(int, const char **) {
    NYT::IClientPtr client = NYT::CreateClient(TCommonYTConfigSQ::CInstance().MR_SERVER_HOST_USER_SESSIONS);
    const auto &cfg = TConfig::CInstance();
    const auto &ccfg = TCommonYTConfig::CInstance();
    const auto &rtcfg = NRobotsTxt::TConfig::CInstance();

    NYTUtils::CreatePath(client, cfg.TABLE_ACCEPTANCE_ROBOTS_ROOT);
    TYtValueTrigger robotsTrigger(client, rtcfg.TABLE_EXPORT_ROBOTS, TYtTimeTrigger::AttrName);
    TYtValueTrigger statisticsTrigger(client, cfg.TABLE_ACCEPTANCE_ROBOTS_STATISTICS, TYtTimeTrigger::AttrName);

    if (!statisticsTrigger.NeedUpdate(robotsTrigger.Source)) {
        LOG_INFO("robots, source %ld is already processed", statisticsTrigger.Source.AsInt64());
        return 0;
    }

    const TString stageInputTable = rtcfg.TABLE_EXPORT_ROBOTS;
    const TString acceptedInputTable = ccfg.GetAcceptedPath(rtcfg.TABLE_EXPORT_ROBOTS);

    NYT::ITransactionPtr tx = client->StartTransaction();

    LOG_INFO("robots, source %ld", robotsTrigger.Source.AsInt64());
    LOG_INFO("robots, input %s", acceptedInputTable.c_str());
    LOG_INFO("robots, input %s", stageInputTable.c_str());
    LOG_INFO("robots, output %s", cfg.TABLE_ACCEPTANCE_ROBOTS_STATISTICS.c_str());

    TReduceCmd<TRobotsDiffReducer>(tx, new TRobotsDiffReducer(robotsTrigger.Source.AsInt64()))
        .Input(TTable<NYT::TYamr>(tx, acceptedInputTable), RobotsPrevInputTag)
        .Input(TTable<NYT::TYamr>(tx, stageInputTable), RobotsNewInputTag)
        .Output(TTable<NProto::TRobotsDiff>(tx, cfg.TABLE_ACCEPTANCE_ROBOTS_STATISTICS)
            .AsSortedOutput({"Host", "Timestamp"}), RobotsDiffOutputTag
        )
        .ReduceBy({"key"})
        .Do()
    ;

    if (!tx->Exists(cfg.TABLE_ACCEPTANCE_ROBOTS_ARCHIVE)) {
        tx->Copy(cfg.TABLE_ACCEPTANCE_ROBOTS_STATISTICS, cfg.TABLE_ACCEPTANCE_ROBOTS_ARCHIVE);
    }

    TReduceCmd<TRobotsArchiveLimitReducer>(tx)
        .Input(TTable<NProto::TRobotsDiff>(tx, cfg.TABLE_ACCEPTANCE_ROBOTS_ARCHIVE))
        .Input(TTable<NProto::TRobotsDiff>(tx, cfg.TABLE_ACCEPTANCE_ROBOTS_STATISTICS))
        .Output(TTable<NProto::TRobotsDiff>(tx, cfg.TABLE_ACCEPTANCE_ROBOTS_ARCHIVE)
            .AsSortedOutput({"Host", "Timestamp"})
        )
        .SortBy({"Host", "Timestamp"})
        .ReduceBy({"Host", "Timestamp"})
        .Do()
    ;

    statisticsTrigger.Update(tx, robotsTrigger.Source);
    ValidateRobotsDiffs(tx);

    const auto opts = NYT::TCopyOptions().Force(true);
    tx->Copy(stageInputTable, acceptedInputTable, opts);
    tx->Commit();

    return 0;
}

} //namespace NAcceptance
} //namespace NWebmaster
