#include <util/generic/deque.h>
#include <util/generic/set.h>
#include <util/generic/hash_set.h>
#include <library/cpp/cgiparam/cgiparam.h>
#include <library/cpp/string_utils/url/url.h>
#include <util/thread/pool.h>

#include <kernel/hosts/owner/owner.h>
#include <mapreduce/yt/interface/client.h>

#include <wmconsole/version3/protos/queries2.pb.h>
#include <wmconsole/version3/junk/spam_hosts_ml/dataset/predict.h>
#include <wmconsole/version3/wmcutil/args.h>
#include <wmconsole/version3/wmcutil/hostid.h>
#include <wmconsole/version3/wmcutil/regex.h>
#include <wmconsole/version3/wmcutil/url.h>
#include <wmconsole/version3/wmcutil/thread.h>
#include <wmconsole/version3/wmcutil/log.h>
#include <wmconsole/version3/wmcutil/yt/yt_runner.h>
#include <wmconsole/version3/wmcutil/yt/yt_utils.h>

#include "config.h"

namespace {
/*
const char *F_PATH = "Path";
const char *F_QUERY = "Query";
const char *F_REGION_ID = "RegionId";
*/

const char *F_HOST = "Host";
const char *F_USER_ID = "UserId";
const char *F_USER_IP = "UserIp";
const char *F_OWNER = "Owner";
const char *F_TIME_MS = "TimeMs";
const char *F_REQUESTS = "Requests";

const char *F_HOSTS = "Hosts";
const char *F_UIDS = "UIDs";
const char *F_IPS = "IPs";
const char *F_OWNERS = "Owners";
const char *F_COUNT = "Count";
const char *F_METHOD = "Method";
const char *F_TIMESTAMP = "Timestamp";

const char *F_REPORT = "Report";
const char *F_SPAMNESS = "Spamness";

const char *F_MAGNITUDE = "Magnitude";
}

namespace NWebmaster {

struct TPercentiles {
    void Add(double value) {
        Values.insert(value);
    }

    double Get(double percentile) {
        if (Values.empty()) {
            return 0.0;
        }

        double rank = percentile / 100.0 * static_cast<double>(Values.size());
        if (floor(rank) != rank) {
            rank = floor(rank) + 1.0;
        }

        double el = 0;
        double pValue = *Values.begin();
        for (auto it = Values.begin(); it != Values.end() && el < rank; ++it, el += 1.0) {
            pValue = *it;
        }

        return pValue;
    }

public:
    TSet<double> Values;
};

TPercentiles GetHostnamesSpamness(const TClfPredictor &clfPredictor, const THashMap<TString, size_t> &hosts) {
    if (hosts.empty()) {
        return TPercentiles();
    }

    TVector<TString> hostnames;
    for (const auto &obj : hosts) {
        hostnames.push_back(obj.first);
    }
    TVector<double> hostnamesSpamness = clfPredictor.Predict(hostnames);
    TPercentiles percentiles;
    for (double value : hostnamesSpamness) {
        percentiles.Add(value);
    }
    return percentiles;
}

struct TFrontLogRecord {
    TFrontLogRecord(const TString &message) {
        //static TRegularExpression regex("^\\[pid:\\d+\\] Resolved request <(.+)> \\[in .+ ms\\] for (.+)( ?{?.*}?)?$");
        //static TRegularExpression regex("^\\[pid:\\d+?\\] Resolved request <(.+?)> \\[in .+? ms\\].*? for (.+?)");
        static TRegularExpression regex("^\\[pid:\\d+\\] Resolved request <(.+?)> \\[in .+?\\~(\\d+) ms\\].*? for (.+)");

        TVector<TString> hits;
        const size_t hitsCount = regex.GetMatches(message, hits);

        if (hitsCount == 3) {
            //const TString &action = hits[0];
            TimeMs = FromString<size_t>(hits[1]);
            const TString &rest = hits[2];
            Url = TString{TStringBuf(rest).Before(' ')};

            if (GetHttpPrefixSize(Url) == 0) {
                Url = TString::Join("http://", Url);
            }

            NUri::TUri parsedUrl;
            if (parsedUrl.Parse(Url) != NUri::TState::ParsedOK) {
                ythrow yexception() << "unable to parse url: " << Url << Endl;
            }

            Host = TString{parsedUrl.GetField(NUri::TField::FieldHost)};
            Query = parsedUrl.GetField(NUri::TField::FieldQuery);
            Method = parsedUrl.GetField(NUri::TField::FieldPath);
            Params = TCgiParameters(Query);
        } else {
            ythrow yexception() << "unknown message: " << message;
        }
    }

public:
    TString Url;
    TString Host;
    TString Method;
    TString Query;
    size_t TimeMs;
    TCgiParameters Params;
};

struct TExtractLogsMapper : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
public:
    void Do(TReader *input, TWriter *output) override {
        const size_t TABLENO_PARSED = 0;
        const size_t TABLENO_FOREIGN = 1;
        const size_t TABLENO_NOT_PARSED = 2;
        for (; input->IsValid(); input->Next()) {
            using namespace NYTUtils;
            const NYT::TNode &row = input->GetRow();
            const TString message = FromNodeOrDefault<TString>(row["message"], "");

            try {
                if (FromNodeOrDefault<TString>(row["qloud_project"], "") == "webmaster"
                    && FromNodeOrDefault<TString>(row["qloud_application"], "") == "webmaster-www"
                    && FromNodeOrDefault<TString>(row["qloud_environment"], "") == "production"
                    && FromNodeOrDefault<TString>(row["loggerName"], "") == "stdout"
                    && message.Contains("Resolved request <")
                ) {
                    TFrontLogRecord record(message);
                    if (record.Host != "webmaster3-viewer.search.yandex.net") {
                        output->AddRow(row, TABLENO_FOREIGN);
                        continue;
                    }

                    NYT::TNode dstRow;
                    for (const auto &obj : record.Params) {
                        dstRow(obj.first, obj.second);
                    }

                    output->AddRow(dstRow
                        (F_METHOD, record.Method)
                        (F_TIMESTAMP, TInstant::ParseIso8601Deprecated(row["timestamp"].AsString()).MicroSeconds())
                        (F_TIME_MS, record.TimeMs),
                        TABLENO_PARSED
                    );
                }
            } catch (yexception &e) {
                NYT::TNode dstRow = row;
                output->AddRow(dstRow
                    ("Error", e.what()),
                    TABLENO_NOT_PARSED
                );
            }
        }
    }
};

REGISTER_MAPPER(TExtractLogsMapper)

struct TExtractUserIdMapper : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
public:
    void Do(TReader *input, TWriter *output) override {
        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();
            if (!NYTUtils::IsNodeNull(row["userId"])) {
                const size_t userId = FromString<size_t>(row["userId"].AsString());
                TString hostId = NYTUtils::FromNodeOrDefault<TString>(row["hostId"], "");

                long tmpHostId = 0;
                if (!hostId.empty() && !TryFromString(hostId, tmpHostId)) {
                    hostId = TWebmasterHostId::FromHostId(hostId).ToHostName();
                } else {
                    hostId = "";
                }

                output->AddRow(NYT::TNode()
                    (F_METHOD, row[F_METHOD])
                    (F_TIME_MS, row[F_TIME_MS])
                    (F_USER_ID, userId)
                    (F_HOST, hostId)
                    (F_USER_IP, row["userIp"])
                    (F_TIMESTAMP, row[F_TIMESTAMP])
                );
            }
        }
    }
};

REGISTER_MAPPER(TExtractUserIdMapper)

struct TExtractUserIdReducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
public:
    void Start(TWriter *) override {
        OwnerCanonizer.LoadTrueOwners();
        OwnerCanonizer.LoadSerpCategOwners();
    }

    void Do(TReader *input, TWriter *output) override {
        struct TRecord {
            void Add(const TOwnerCanonizer &ownerCanonizer, const NYT::TNode &row) {
                const TString host = row[F_HOST].AsString();
                if (!host.empty()) {
                    Hosts[host]++;
                    Owners[ownerCanonizer.GetHostOwner(host)]++;
                }

                IPs[row[F_USER_IP].AsString()]++;
                Requests++;
                TimeMs += row[F_TIME_MS].AsUint64();
            }

        public:
            THashMap<TString, size_t> IPs;
            THashMap<TString, size_t> Hosts;
            THashMap<TString, size_t> Owners;
            size_t Requests = 0;
            size_t TimeMs = 0;
        };


        size_t requests = 0;
        size_t timeMs = 0;
        const size_t userId = input->GetRow()[F_USER_ID].AsUint64();
        THashMap<TString, TRecord> records;

        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();
            const TString method = row[F_METHOD].AsString();
            records[method].Add(OwnerCanonizer, row);
            //row[F_TIMESTAMP]
            requests++;
            timeMs += row[F_TIME_MS].AsUint64();
        }

        NYT::TNode dstRow, reportRow = NYT::TNode::CreateMap();

        THashMap<TString, size_t> ips;
        THashMap<TString, size_t> hosts;
        THashMap<TString, size_t> owners;

        for (const auto &obj : records) {
            const TRecord &record = obj.second;
            NYT::TNode methodRow;

            for (const auto &hostObj : record.Hosts) {
                hosts[hostObj.first] += hostObj.second;
            }
            for (const auto &ipObj : record.IPs) {
                ips[ipObj.first] += ipObj.second;
            }
            for (const auto &ownerObj : record.Owners) {
                owners[ownerObj.first] += ownerObj.second;
            }

            methodRow(F_REQUESTS, record.Requests);
            methodRow(F_TIME_MS, record.TimeMs);
            methodRow(F_HOSTS, record.Hosts.size());
            methodRow(F_OWNERS, record.Owners.size());
            methodRow(F_IPS, record.IPs.size());
            reportRow(obj.first, methodRow);
        }

        NYT::TNode hostsRow;
        NYT::TNode ipsRow;
        NYT::TNode ownersRow;
        NYT::TNode spamnessRow = NYT::TNode::CreateMap();

        if (hosts.empty()) {
            hostsRow = NYT::TNode::CreateEntity();
            ownersRow = NYT::TNode::CreateEntity();
        } else {
            for (const auto &hostObj : hosts) {
                hostsRow(hostObj.first, hostObj.second);
                hosts[hostObj.first] += hostObj.second;
            }
            for (const auto &ownerObj : owners) {
                ownersRow(ownerObj.first, ownerObj.second);
                owners[ownerObj.first] += ownerObj.second;
            }
        }
        for (const auto &ipObj : ips) {
            ipsRow(ipObj.first, ipObj.second);
            ips[ipObj.first] += ipObj.second;
        }

        TPercentiles spamnessPercentiles = GetHostnamesSpamness(ClfPredictor, hosts);
        static const TDeque<double> PERCENTILES = {10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 97.5, 100};
        for (double p : PERCENTILES) {
            spamnessRow["p" + ToString(p)] = spamnessPercentiles.Get(p);
        }

        output->AddRow(dstRow
            (F_USER_ID, userId)
            (F_REQUESTS, requests)
            (F_TIME_MS, timeMs)
            (F_REPORT, reportRow)
            (F_HOSTS, NYT::TNode()
                (F_COUNT, hosts.size())
                (F_HOSTS, hostsRow)
            )
            (F_IPS, NYT::TNode()
                (F_COUNT, ips.size())
                (F_IPS, ipsRow)
            )
            (F_OWNERS, NYT::TNode()
                (F_COUNT, owners.size())
                (F_OWNERS, ownersRow)
            )
            (F_SPAMNESS, spamnessRow)
        );
    }

public:
    TClfPredictor ClfPredictor;
    TOwnerCanonizer OwnerCanonizer;
};

REGISTER_REDUCER(TExtractUserIdReducer)

struct TExtractOwnerMapper : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
public:
    void Start(TWriter *) override {
        OwnerCanonizer.LoadTrueOwners();
        OwnerCanonizer.LoadSerpCategOwners();
    }

    void Do(TReader *input, TWriter *output) override {
        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();
            if (!NYTUtils::IsNodeNull(row["hostId"]) && !NYTUtils::IsNodeNull(row["userIp"])) {
                const size_t userId = FromString<size_t>(NYTUtils::FromNodeOrDefault<TString>(row["userId"], "0"));
                TString hostId = row["hostId"].AsString();
                long tmpHostId = 0;
                if (!hostId.empty() && !TryFromString(hostId, tmpHostId)) {
                    hostId = TWebmasterHostId::FromHostId(hostId).ToHostName();
                    const TString owner = TString{OwnerCanonizer.GetHostOwner(hostId)};
                    output->AddRow(NYT::TNode()
                        (F_METHOD, row[F_METHOD])
                        (F_TIME_MS, row[F_TIME_MS])
                        (F_USER_ID, userId)
                        (F_HOST, hostId)
                        (F_OWNER, owner)
                        (F_USER_IP, row["userIp"])
                        (F_TIMESTAMP, row[F_TIMESTAMP])
                    );
                }
            }
        }
    }

public:
    TOwnerCanonizer OwnerCanonizer;
};

REGISTER_MAPPER(TExtractOwnerMapper)

struct TExtractOwnerReducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
public:
    void Do(TReader *input, TWriter *output) override {
        struct TRecord {
            void Add(const NYT::TNode &row) {
                Hosts[row[F_HOST].AsString()]++;
                IPs[row[F_USER_IP].AsString()]++;
                size_t userId = row[F_USER_ID].AsUint64();
                if (userId != 0) {
                    UIDs[userId]++;
                }
                Requests++;
                TimeMs += row[F_TIME_MS].AsUint64();
            }

        public:
            THashMap<TString, size_t> IPs;
            THashMap<size_t, size_t> UIDs;
            THashMap<TString, size_t> Hosts;
            size_t Requests = 0;
            size_t TimeMs = 0;
        };

        size_t requests = 0;
        size_t timeMs = 0;
        const TString owner = input->GetRow()[F_OWNER].AsString();
        THashMap<TString, TRecord> records;

        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();
            const TString method = row[F_METHOD].AsString();
            records[method].Add(row);
            //row[F_TIMESTAMP]
            requests++;
            timeMs += row[F_TIME_MS].AsUint64();
        }

        NYT::TNode dstRow, reportRow = NYT::TNode::CreateMap();

        THashMap<TString, size_t> ips;
        THashMap<TString, size_t> hosts;
        THashMap<size_t, size_t> uids;

        for (const auto &obj : records) {
            const TRecord &record = obj.second;
            NYT::TNode methodRow;

            for (const auto &hostObj : record.Hosts) {
                hosts[hostObj.first] += hostObj.second;
            }
            for (const auto &ipObj : record.IPs) {
                ips[ipObj.first] += ipObj.second;
            }
            for (const auto &uidObj : record.UIDs) {
                uids[uidObj.first] += uidObj.second;
            }

            methodRow(F_REQUESTS, record.Requests);
            methodRow(F_TIME_MS, record.TimeMs);
            methodRow(F_HOSTS, record.Hosts.size());
            methodRow(F_UIDS, record.UIDs.size());
            methodRow(F_IPS, record.IPs.size());
            reportRow(obj.first, methodRow);
        }

        NYT::TNode hostsRow;
        NYT::TNode ipsRow;
        NYT::TNode uidsRow;
        NYT::TNode spamnessRow = NYT::TNode::CreateMap();

        for (const auto &hostObj : hosts) {
            hostsRow(hostObj.first, hostObj.second);
            hosts[hostObj.first] += hostObj.second;
        }

        for (const auto &ipObj : ips) {
            ipsRow(ipObj.first, ipObj.second);
            ips[ipObj.first] += ipObj.second;
        }

        for (const auto &uidObj : uids) {
            uidsRow("uid_" + ToString(uidObj.first), uidObj.second);
            uids[uidObj.first] += uidObj.second;
        }

        if (uids.empty()) {
            uidsRow = NYT::TNode::CreateEntity();
        }

        TPercentiles spamnessPercentiles = GetHostnamesSpamness(ClfPredictor, hosts);
        static const TDeque<double> PERCENTILES = {10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 97.5, 100};
        for (double p : PERCENTILES) {
            spamnessRow["p" + ToString(p)] = spamnessPercentiles.Get(p);
        }

        output->AddRow(dstRow
            (F_OWNER, owner)
            (F_REQUESTS, requests)
            (F_TIME_MS, timeMs)
            (F_REPORT, reportRow)
            (F_HOSTS, NYT::TNode()
                (F_COUNT, hosts.size())
                (F_HOSTS, hostsRow)
            )
            (F_IPS, NYT::TNode()
                (F_COUNT, ips.size())
                (F_IPS, ipsRow)
            )
            (F_UIDS, NYT::TNode()
                (F_COUNT, uids.size())
                (F_UIDS, uidsRow)
            )
            (F_SPAMNESS, spamnessRow)
        );
    }

public:
    TClfPredictor ClfPredictor;
};

REGISTER_REDUCER(TExtractOwnerReducer)

struct TEvalReportMetricsMapper : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    void Do(TReader *input, TWriter *output) override {
        for (; input->IsValid(); input->Next()) {
            NYT::TNode row = input->GetRow();
            output->AddRow(row
                ("EmptyKey", "")
            );
        }
    }
};

REGISTER_MAPPER(TEvalReportMetricsMapper)

struct TEvalReportMetricsReducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    static double Mean(const TDeque<double> &src) {
        double sum = 0;

        for (double data : src) {
            sum += data;
        }

        return sum / double(src.size());
    }

    static double StdDev(double mean, const TDeque<double> &src) {
        double sigma2 = 0;

        for (double data : src) {
            double diff = data - mean;
            sigma2 += (diff * diff);
        }

        return sqrt(sigma2 / double(src.size()));
    }

    void Do(TReader *input, TWriter *output) override {
        TDeque<double> requests;
        TDeque<double> times;

        for (; input->IsValid(); input->Next()) {
            const NYT::TNode &row = input->GetRow();
            requests.push_back(row[F_REQUESTS].AsUint64());
            times.push_back(row[F_TIME_MS].AsUint64());
        }

        std::sort(requests.begin(), requests.end());
        std::sort(times.begin(), times.end());

        double meanTimes = Mean(times);
        double meanRequests = Mean(requests);
        double medianRequests = requests[requests.size() / 2];
        double medianTimes = times[times.size() / 2];

        output->AddRow(NYT::TNode()
            ("MeanRequests", meanRequests)
            ("MeanTimes", meanTimes)
            ("MedianRequests", medianRequests)
            ("MedianTimes", medianTimes)
            ("StdDevRequests", StdDev(meanRequests, requests))
            ("StdDevTimes", StdDev(meanTimes, times))
        );
    }
};

REGISTER_REDUCER(TEvalReportMetricsReducer)

struct TSuggestBanMapper : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    void Do(TReader *input, TWriter *output) override {
        for (; input->IsValid(); input->Next()) {
            NYT::TNode row = input->GetRow();
            output->AddRow(row
                ("TableNo", static_cast<ui32>(input->GetTableIndex()))
            );
        }
    }
};

REGISTER_MAPPER(TSuggestBanMapper)

struct TReportTableConfig {
    TReportTableConfig() = default;
    TReportTableConfig(time_t timestamp, double meanRequests, double meanTimes,
        double medianRequests, double medianTimes, double stdDevRequests, double stdDevTimes)
        : Timestamp(timestamp)
        , MeanRequests(meanRequests)
        , MeanTimes(meanTimes)
        , MedianRequests(medianRequests)
        , MedianTimes(medianTimes)
        , StdDevRequests(stdDevRequests)
        , StdDevTimes(stdDevTimes)
    {
    }

public:
    time_t Timestamp = 0;
    double MeanRequests = 0.0;
    double MeanTimes = 0.0;
    double MedianRequests = 0.0;
    double MedianTimes = 0.0;
    double StdDevRequests = 0.0;
    double StdDevTimes = 0.0;

public:
    Y_SAVELOAD_DEFINE(
        Timestamp,
        MeanRequests,
        MeanTimes,
        MedianRequests,
        MedianTimes,
        StdDevRequests,
        StdDevTimes
    )
};

struct TSuggestUidBanReducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    Y_SAVELOAD_JOB(TableConfig)

public:
    TSuggestUidBanReducer() = default;
    TSuggestUidBanReducer(const THashMap<ui32, TReportTableConfig> &tableConfig)
        : TableConfig(tableConfig)
    {
    };

    void Do(TReader *input, TWriter *output) override {
        struct TRecord {
            void Set(const NYT::TNode &row, const TReportTableConfig &config) {
                const size_t STREAM_5M_SECONDS = 300;
                const size_t hostsCount = row[F_HOSTS][F_COUNT].AsUint64();
                const size_t ipsCount = row[F_IPS][F_COUNT].AsUint64();
                const size_t ownersCount = row[F_OWNERS][F_COUNT].AsUint64();

                Requests = row[F_REQUESTS].AsUint64();
                TimeMs = row[F_TIME_MS].AsUint64();
                RPS = static_cast<double>(Requests) / static_cast<double>(STREAM_5M_SECONDS);
                double requestsSigmaDistance = 0;
                double timesSigmaDistance = 0;

                if (Requests > config.MeanRequests) {
                    requestsSigmaDistance = (Requests - config.MeanRequests) / config.StdDevRequests;
                }

                if (TimeMs > config.MeanTimes) {
                    timesSigmaDistance = (TimeMs - config.MeanTimes) / config.StdDevTimes;
                }

                if (!NYTUtils::IsNodeNull(row[F_HOSTS][F_HOSTS])) {
                    for (const auto &obj : row[F_HOSTS][F_HOSTS].AsMap()) {
                        Hosts.insert(obj.first);
                    }
                }

                if (!NYTUtils::IsNodeNull(row[F_OWNERS][F_OWNERS])) {
                    for (const auto &obj : row[F_OWNERS][F_OWNERS].AsMap()) {
                        Owners.insert(obj.first);
                    }
                }

                if (!NYTUtils::IsNodeNull(row[F_IPS][F_IPS])) {
                    for (const auto &obj : row[F_IPS][F_IPS].AsMap()) {
                        IPs.insert(obj.first);
                    }
                }

                SuspiciousSpamness30p = row[F_SPAMNESS]["p30"].AsDouble() > 0.5;
                SuspiciousRequests = requestsSigmaDistance > 3.0;
                SuspiciousTimes = timesSigmaDistance > 3.0;
                SuspiciousIPs = ipsCount > 3;
                SuspiciousRPS = RPS >= 1.0;
                bool suspiciousLoad = SuspiciousRequests | SuspiciousRequests;
                SuspiciousHosts = hostsCount > 3 && suspiciousLoad;
                SuspiciousOwners = ownersCount > 3 && suspiciousLoad;

                Magnitude += int(SuspiciousRequests);
                Magnitude += int(SuspiciousTimes);
                Magnitude += int(SuspiciousHosts);
                Magnitude += int(SuspiciousIPs);
                Magnitude += int(SuspiciousOwners);
                Magnitude += int(SuspiciousSpamness30p);
                Magnitude += int(SuspiciousRPS);

                if (Magnitude == 1 && SuspiciousSpamness30p) {
                    Magnitude = 0;
                    SuspiciousSpamness30p = false;
                }
            }

            bool GetSuspicious() const {
                return Magnitude > 0;
            }

        public:
            size_t Magnitude = 0;
            bool SuspiciousRequests = false;
            bool SuspiciousTimes = false;
            bool SuspiciousHosts = false;
            bool SuspiciousIPs = false;
            bool SuspiciousOwners = false;
            bool SuspiciousSpamness30p = false;
            bool SuspiciousRPS = false;
            THashSet<TString> IPs;
            THashSet<TString> Hosts;
            THashSet<TString> Owners;
            size_t TimeMs = 0;
            size_t Requests = 0;
            double RPS = 0;
        };

        THashMap<time_t, TRecord> evaluations;
        bool suspicious = false;
        size_t requests = 0;
        size_t timeMs = 0;
        double maxRPS = 0;
        const size_t userId = input->GetRow()[F_USER_ID].AsUint64();
        for (; input->IsValid(); input->Next()) {
            const NYT::TNode row = input->GetRow();
            const ui32 tableNo = row["TableNo"].AsUint64();

            if (!TableConfig.contains(tableNo)) {
                ythrow yexception() << "unknown table index " << tableNo;
            }

            const TReportTableConfig &config = TableConfig.at(tableNo);
            TRecord &evaluation = evaluations[config.Timestamp];
            evaluation.Set(row, config);
            suspicious |= evaluation.GetSuspicious();
            requests += evaluation.Requests;
            timeMs += evaluation.TimeMs;
            maxRPS = std::max(maxRPS, evaluation.RPS);
        }

        NYT::TNode dstRow;
        if (suspicious) {
            TSet<TString> hosts;
            TSet<TString> owners;
            TSet<TString> ips;

            NYT::TNode hostsRow = NYT::TNode::CreateList();
            NYT::TNode ownersRow = NYT::TNode::CreateList();
            NYT::TNode ipsRow = NYT::TNode::CreateList();
            NYT::TNode reportsRow = NYT::TNode::CreateList();

            size_t suspiciousReports = 0;
            size_t magnitude = 0;
            for (const auto &obj : evaluations) {
                const time_t timestamp = obj.first;
                const TRecord &evaluation = obj.second;

                if (evaluation.GetSuspicious()) {
                    hosts.insert(obj.second.Hosts.begin(), obj.second.Hosts.end());
                    ips.insert(obj.second.IPs.begin(), obj.second.IPs.end());
                    owners.insert(obj.second.Owners.begin(), obj.second.Owners.end());
                    reportsRow.Add(NYT::TNode()
                        (F_TIMESTAMP, timestamp)
                        (F_MAGNITUDE, evaluation.Magnitude)
                        ("Suspicious", NYT::TNode()
                            ("RequestsGt3S", evaluation.SuspiciousRequests)
                            ("TimeMsGt3S", evaluation.SuspiciousTimes)
                            ("HostsGt3", evaluation.SuspiciousHosts)
                            ("IPsGt3", evaluation.SuspiciousIPs)
                            ("OwnersGt3", evaluation.SuspiciousOwners)
                            ("Spamness30p", evaluation.SuspiciousSpamness30p)
                            ("RPSGt1", evaluation.SuspiciousRPS)
                            (F_REQUESTS, evaluation.Requests)
                            (F_TIME_MS, evaluation.TimeMs)
                            ("RPS", evaluation.RPS)
                        )
                    );
                    suspiciousReports++;
                    magnitude += evaluation.Magnitude;
                } else {
                    reportsRow.Add(NYT::TNode()
                        (F_TIMESTAMP, timestamp)
                        (F_MAGNITUDE, evaluation.Magnitude)
                        ("Suspicious", NYT::TNode::CreateEntity())
                    );
                }
            }

            for (const TString &obj : hosts) {
                hostsRow.Add(obj);
            }

            for (const TString &obj : ips) {
                ipsRow.Add(obj);
            }

            for (const TString &obj : owners) {
                ownersRow.Add(obj);
            }

            output->AddRow(NYT::TNode()
                (F_USER_ID, userId)
                ("Reports", reportsRow)
                ("SuspiciousReports", suspiciousReports)
                ("TotalReports", evaluations.size())
                ("SuspicousHosts", hostsRow)
                ("SuspicousIPs", ipsRow)
                ("SuspicousOwners", ownersRow)
                (F_REQUESTS, requests)
                (F_TIME_MS, timeMs)
                (F_MAGNITUDE, magnitude)
                ("RPS", maxRPS)
            );
        }
    }

public:
    THashMap<ui32, TReportTableConfig> TableConfig;
};

REGISTER_REDUCER(TSuggestUidBanReducer)

struct TSuggestOwnerBanReducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    Y_SAVELOAD_JOB(TableConfig)

public:
    TSuggestOwnerBanReducer() = default;
    TSuggestOwnerBanReducer(const THashMap<ui32, TReportTableConfig> &tableConfig)
        : TableConfig(tableConfig)
    {
    };

    void Do(TReader *input, TWriter *output) override {
        struct TRecord {
            void Set(const NYT::TNode &row, const TReportTableConfig &config) {
                const size_t STREAM_5M_SECONDS = 300;
                const size_t hostsCount = row[F_HOSTS][F_COUNT].AsUint64();
                const size_t ipsCount = row[F_IPS][F_COUNT].AsUint64();
                const size_t uidsCount = row[F_UIDS][F_COUNT].AsUint64();

                Requests = row[F_REQUESTS].AsUint64();
                TimeMs = row[F_TIME_MS].AsUint64();
                RPS = static_cast<double>(Requests) / static_cast<double>(STREAM_5M_SECONDS);
                double requestsSigmaDistance = 0;
                double timesSigmaDistance = 0;

                if (Requests > config.MeanRequests) {
                    requestsSigmaDistance = (Requests - config.MeanRequests) / config.StdDevRequests;
                }

                if (TimeMs > config.MeanTimes) {
                    timesSigmaDistance = (TimeMs - config.MeanTimes) / config.StdDevTimes;
                }

                if (!NYTUtils::IsNodeNull(row[F_HOSTS][F_HOSTS])) {
                    for (const auto &obj : row[F_HOSTS][F_HOSTS].AsMap()) {
                        Hosts.insert(obj.first);
                    }
                }

                if (!NYTUtils::IsNodeNull(row[F_UIDS][F_UIDS])) {
                    for (const auto &obj : row[F_UIDS][F_UIDS].AsMap()) {
                        TStringBuf buf(obj.first);
                        if (buf.SkipPrefix("uid_")) {
                            UIDs.insert(FromString<size_t>(buf));
                        }
                    }
                }

                if (!NYTUtils::IsNodeNull(row[F_IPS][F_IPS])) {
                    for (const auto &obj : row[F_IPS][F_IPS].AsMap()) {
                        IPs.insert(obj.first);
                    }
                }

                SuspiciousSpamness30p = row[F_SPAMNESS]["p30"].AsDouble() > 0.5;
                SuspiciousRequests = requestsSigmaDistance > 3.0;
                SuspiciousRPS = RPS >= 1.0;
                SuspiciousTimes = timesSigmaDistance > 3.0;
                SuspiciousIPs = ipsCount > 3;

                bool suspiciousLoad = SuspiciousRequests | SuspiciousRequests;
                SuspiciousHosts = hostsCount > 3 && suspiciousLoad;
                SuspiciousUIDs = uidsCount > 3 && suspiciousLoad;

                Magnitude += int(SuspiciousRequests);
                Magnitude += int(SuspiciousTimes);
                Magnitude += int(SuspiciousHosts);
                Magnitude += int(SuspiciousIPs);
                Magnitude += int(SuspiciousUIDs);
                Magnitude += int(SuspiciousSpamness30p);
                Magnitude += int(SuspiciousRPS);

                if (Magnitude == 1 && SuspiciousSpamness30p) {
                    Magnitude = 0;
                    SuspiciousSpamness30p = false;
                }
            }

            bool GetSuspicious() const {
                return Magnitude > 0;
            }

        public:
            size_t Magnitude = 0;
            bool SuspiciousRequests = false;
            bool SuspiciousTimes = false;
            bool SuspiciousHosts = false;
            bool SuspiciousIPs = false;
            bool SuspiciousUIDs = false;
            bool SuspiciousSpamness30p = false;
            bool SuspiciousRPS = false;
            THashSet<TString> IPs;
            THashSet<TString> Hosts;
            THashSet<size_t> UIDs;
            size_t Requests = 0;
            size_t TimeMs = 0;
            double RPS = 0;
        };

        THashMap<time_t, TRecord> evaluations;
        bool suspicious = false;
        size_t requests = 0;
        size_t timeMs = 0;
        double maxRPS = 0.0;
        size_t magnitude = 0;
        const TString owner = input->GetRow()[F_OWNER].AsString();

        for (; input->IsValid(); input->Next()) {
            const NYT::TNode row = input->GetRow();
            const ui32 tableNo = row["TableNo"].AsUint64();

            if (!TableConfig.contains(tableNo)) {
                ythrow yexception() << "unknown table index " << tableNo;
            }

            const TReportTableConfig &config = TableConfig.at(tableNo);
            TRecord &evaluation = evaluations[config.Timestamp];
            evaluation.Set(row, config);
            suspicious |= evaluation.GetSuspicious();
            requests += evaluation.Requests;
            timeMs += evaluation.TimeMs;
            maxRPS = std::max(maxRPS, evaluation.RPS);
        }

        NYT::TNode dstRow;
        if (suspicious) {
            TSet<TString> hosts;
            TSet<size_t> uids;
            TSet<TString> ips;

            NYT::TNode hostsRow = NYT::TNode::CreateList();
            NYT::TNode uidsRow = NYT::TNode::CreateList();
            NYT::TNode ipsRow = NYT::TNode::CreateList();
            NYT::TNode reportsRow = NYT::TNode::CreateList();

            size_t suspiciousReports = 0;
            for (const auto &obj : evaluations) {
                const time_t timestamp = obj.first;
                const TRecord &evaluation = obj.second;

                if (evaluation.GetSuspicious()) {
                    hosts.insert(obj.second.Hosts.begin(), obj.second.Hosts.end());
                    ips.insert(obj.second.IPs.begin(), obj.second.IPs.end());
                    uids.insert(obj.second.UIDs.begin(), obj.second.UIDs.end());
                    reportsRow.Add(NYT::TNode()
                        (F_TIMESTAMP, timestamp)
                        (F_MAGNITUDE, evaluation.Magnitude)
                        ("Suspicious", NYT::TNode()
                            ("RequestsGt3S", evaluation.SuspiciousRequests)
                            ("TimeMsGt3S", evaluation.SuspiciousTimes)
                            ("HostsGt3", evaluation.SuspiciousHosts)
                            ("IPsGt3", evaluation.SuspiciousIPs)
                            ("UIDsGt3", evaluation.SuspiciousUIDs)
                            ("Spamness30p", evaluation.SuspiciousSpamness30p)
                            ("RPSGt1", evaluation.SuspiciousRPS)
                            (F_REQUESTS, evaluation.Requests)
                            (F_TIME_MS, evaluation.TimeMs)
                            ("RPS", evaluation.RPS)
                        )
                    );
                    suspiciousReports++;
                    magnitude += evaluation.Magnitude;
                } else {
                    reportsRow.Add(NYT::TNode()
                        (F_TIMESTAMP, timestamp)
                        (F_MAGNITUDE, evaluation.Magnitude)
                        ("Suspicious", NYT::TNode::CreateEntity())
                    );
                }
            }

            for (const TString &obj : hosts) {
                hostsRow.Add(obj);
            }

            for (const TString &obj : ips) {
                ipsRow.Add(obj);
            }

            for (const size_t &obj : uids) {
                uidsRow.Add(obj);
            }

            output->AddRow(NYT::TNode()
                (F_OWNER, owner)
                ("Reports", reportsRow)
                ("SuspiciousReports", suspiciousReports)
                ("TotalReports", evaluations.size())
                ("SuspicousHosts", hostsRow)
                ("SuspicousIPs", ipsRow)
                ("SuspicousUIDs", uidsRow)
                (F_REQUESTS, requests)
                (F_TIME_MS, timeMs)
                (F_MAGNITUDE, magnitude)
                ("RPS", maxRPS)
            );
        }
    }

public:
    THashMap<ui32, TReportTableConfig> TableConfig;
};

REGISTER_REDUCER(TSuggestOwnerBanReducer)

void CleanupTables(NYT::IClientBasePtr client, const TString &prefix, size_t keepTables = 14) {
    TDeque<NYTUtils::TTableInfo> tables;
    NYTUtils::GetTableList(client, prefix, tables, Max<size_t>());

    std::sort(tables.begin(), tables.end(), [](const NYTUtils::TTableInfo &lhs, const NYTUtils::TTableInfo &rhs) -> bool {
        return lhs.Name > rhs.Name;
    });

    if (tables.size() <= keepTables) {
        return;
    }

    for (size_t i = keepTables; i < tables.size(); i++) {
        TOpRunner(client).Drop(tables[i].Name);
    }
}

void ParseLogs(NYT::IClientBasePtr client, const TString &inputTable, const TString &parsedOutput, const TString &foreignOutput, const TString &notParsedOutput) {
    const TString dateStr = NYTUtils::GetTableName(inputTable);

    if (client->Exists(parsedOutput)) {
        LOG_INFO("skipping %s", parsedOutput.data());
    } else {
        NYT::ITransactionPtr tx = client->StartTransaction();
        TOpRunner(tx)
            .InputNode(inputTable)
            .OutputNode(NYT::TRichYPath(parsedOutput))
            .OutputNode(NYT::TRichYPath(foreignOutput))
            .OutputNode(NYT::TRichYPath(notParsedOutput))
            .Map(new TExtractLogsMapper)
            .Drop(foreignOutput)    //debug
            .Drop(notParsedOutput)  //debug
            .SortBy(F_TIMESTAMP)
            .Sort(parsedOutput)
        ;
        tx->Commit();
    }
}

NYT::TTableSchema GetUidReportSchema() {
    NYT::TTableSchema schema = NYT::TTableSchema()
        .Strict(false)
        .AddColumn(NYT::TColumnSchema().Name(F_USER_ID).Type(NYT::VT_UINT64))
        .AddColumn(NYT::TColumnSchema().Name(F_REQUESTS).Type(NYT::VT_UINT64))
        .AddColumn(NYT::TColumnSchema().Name(F_TIME_MS).Type(NYT::VT_UINT64))
    ;

    return schema;
}

NYT::TTableSchema GetOwnerReportSchema() {
    NYT::TTableSchema schema = NYT::TTableSchema()
        .Strict(false)
        .AddColumn(NYT::TColumnSchema().Name(F_OWNER).Type(NYT::VT_STRING))
        .AddColumn(NYT::TColumnSchema().Name(F_REQUESTS).Type(NYT::VT_UINT64))
        .AddColumn(NYT::TColumnSchema().Name(F_TIME_MS).Type(NYT::VT_UINT64))
    ;

    return schema;
}

void BuildUidReport(NYT::IClientBasePtr client, const TString &parsedLogs, const TString &report) {
    if (client->Exists(report)) {
        LOG_INFO("skipping %s", report.data());
    } else {
        NYT::ITransactionPtr tx = client->StartTransaction();
        TOpRunner(tx)
            .InputNode(parsedLogs)
            .OutputNode(NYT::TRichYPath(report).Schema(GetUidReportSchema()))
            .ReduceBy(F_USER_ID)
            .PartitionCount(100)
            .MapReduce(new TExtractUserIdMapper, new TExtractUserIdReducer)
            .SortBy(F_USER_ID)
            .Sort(report)
        ;
        tx->Commit();
    }
}

void BuildOwnerReport(NYT::IClientBasePtr client, const TString &parsedLogs, const TString &report) {
    if (client->Exists(report)) {
        LOG_INFO("skipping %s", report.data());
    } else {
        NYT::ITransactionPtr tx = client->StartTransaction();
        TOpRunner(tx)
            .InputNode(parsedLogs)
            .OutputNode(NYT::TRichYPath(report).Schema(GetOwnerReportSchema()))
            .ReduceBy(F_OWNER)
            .PartitionCount(100)
            .MapReduce(new TExtractOwnerMapper, new TExtractOwnerReducer)
            .SortBy(F_OWNER)
            .Sort(report)
        ;
        tx->Commit();
    }
}

void GetReportMetrics(NYT::IClientBasePtr client, const TString &reportTable) {
    if (NYTUtils::HasAttr(client, reportTable, "Metrics")) {
        LOG_INFO("skipping metrics %s", reportTable.data());
    } else {
        const TString metricsTable = reportTable + ".METRICS";

        NYT::ITransactionPtr tx = client->StartTransaction();
        TOpRunner(tx)
            .InputNode(reportTable)
            .OutputNode(metricsTable)
            .ReduceBy("EmptyKey")
            .MapReduce(new TEvalReportMetricsMapper, new TEvalReportMetricsReducer)
        ;

        THashMap<TString, double> metrics;
        auto reader = tx->CreateTableReader<NYT::TNode>(metricsTable);
        for (; reader->IsValid(); reader->Next()) {
            const NYT::TNode &row = reader->GetRow();
            double meanRequests     = row["MeanRequests"].AsDouble();
            double meanTimes        = row["MeanTimes"].AsDouble();
            double medianRequests   = row["MedianRequests"].AsDouble();
            double medianTimes      = row["MedianTimes"].AsDouble();
            double stdDevRequests   = row["StdDevRequests"].AsDouble();
            double stdDevTimes      = row["StdDevTimes"].AsDouble();

            NYTUtils::SetAttr(tx, reportTable, "Metrics", NYT::TNode::CreateMap());
            NYTUtils::SetAttr(tx, reportTable, "Metrics/MeanRequests", meanRequests);
            NYTUtils::SetAttr(tx, reportTable, "Metrics/MeanRequests", meanRequests);
            NYTUtils::SetAttr(tx, reportTable, "Metrics/MeanTimes", meanTimes);
            NYTUtils::SetAttr(tx, reportTable, "Metrics/MedianRequests", medianRequests);
            NYTUtils::SetAttr(tx, reportTable, "Metrics/MedianTimes", medianTimes);
            NYTUtils::SetAttr(tx, reportTable, "Metrics/StdDevRequests", stdDevRequests);
            NYTUtils::SetAttr(tx, reportTable, "Metrics/StdDevTimes", stdDevTimes);
        }

        TOpRunner(tx).Drop(metricsTable);
        tx->Commit();
    }
}

TReportTableConfig GetReportTableConfig(NYT::IClientBasePtr client, const TString &tableName) {
    const time_t timestamp = TInstant::ParseIso8601Deprecated(NYTUtils::GetTableName(tableName)).TimeT();
    const NYT::TNode metrics = NYTUtils::GetAttr(client, tableName, "Metrics");
    const double meanRequests     = metrics["MeanRequests"].AsDouble();
    const double meanTimes        = metrics["MeanTimes"].AsDouble();
    const double medianRequests   = metrics["MedianRequests"].AsDouble();
    const double medianTimes      = metrics["MedianTimes"].AsDouble();
    const double stdDevRequests   = metrics["StdDevRequests"].AsDouble();
    const double stdDevTimes      = metrics["StdDevTimes"].AsDouble();
    return TReportTableConfig(timestamp, meanRequests, meanTimes, medianRequests, medianTimes, stdDevRequests, stdDevTimes);
}

template<class TReducer>
void BuildBanSuggest(NYT::IClientBasePtr client, const TString &reportTablesPrefix, const TString &reduceBy, const TString &suggestTable) {
    TDeque<NYTUtils::TTableInfo> tables;
    NYTUtils::GetTableList(client, reportTablesPrefix, tables);
    std::sort(tables.begin(), tables.end(), [](const NYTUtils::TTableInfo &lhs, const NYTUtils::TTableInfo &rhs) -> bool {
        return lhs.Name > rhs.Name;
    });

    if (tables.size() > 12) { //1hour
        tables.resize(12);
    }

    NYT::ITransactionPtr tx = client->StartTransaction();
    TOpRunner runner(tx);

    THashMap<ui32, TReportTableConfig> tablesConfig;
    size_t tableNo = 0;
    for (const NYTUtils::TTableInfo &table : tables) {
        runner.InputNode(table.Name);
        tablesConfig[tableNo++] = GetReportTableConfig(tx, table.Name);
    }

    runner
        .OutputNode(suggestTable)
        .ReduceBy(reduceBy)
        .MapReduce(new TSuggestBanMapper, new TReducer(tablesConfig))
        .SortBy(F_MAGNITUDE)
        .Sort(suggestTable)
    ;
    tx->Commit();
}

void BuildUidBanSuggest(NYT::IClientBasePtr client, const TString &reportTablesPrefix, const TString &suggestTable) {
    BuildBanSuggest<TSuggestUidBanReducer>(client, reportTablesPrefix, F_USER_ID, suggestTable);
}

void BuildOwnerBanSuggest(NYT::IClientBasePtr client, const TString &reportTablesPrefix, const TString &suggestTable) {
    BuildBanSuggest<TSuggestOwnerBanReducer>(client, reportTablesPrefix, F_OWNER, suggestTable);
}

void ProcessDailyLogs(NYT::IClientBasePtr client, const TConfig &config) {
    CleanupTables(client, config.TABLE_SPAM_DEFENCE_1D_PARSED);
    CleanupTables(client, config.TABLE_SPAM_DEFENCE_1D_NOT_PARSED);
    CleanupTables(client, config.TABLE_SPAM_DEFENCE_1D_FOREIGN);

    TDeque<NYTUtils::TTableInfo> tables;
    NYTUtils::GetTableList(client, config.TABLE_SOURCE_FRONT_LOGS_DAILY_ROOT, tables, Max<size_t>());

    std::sort(tables.begin(), tables.end(), [](const NYTUtils::TTableInfo &lhs, const NYTUtils::TTableInfo &rhs) -> bool {
        return lhs.Name > rhs.Name;
    });

    if (tables.size() > 14) {
        tables.resize(14);
    }

    THolder<IThreadPool> queue(CreateThreadPool(4));
    for (const NYTUtils::TTableInfo &table : tables) {
        queue->SafeAddFunc([=, &client]() {
            try {
                const TString dateStr = NYTUtils::GetTableName(table.Name);
                const TString parsedOutput      = NYTUtils::JoinPath(config.TABLE_SPAM_DEFENCE_1D_PARSED, dateStr);
                const TString foreignOutput     = NYTUtils::JoinPath(config.TABLE_SPAM_DEFENCE_1D_FOREIGN, dateStr);
                const TString notParsedOutput   = NYTUtils::JoinPath(config.TABLE_SPAM_DEFENCE_1D_NOT_PARSED, dateStr);
                ParseLogs(client, table.Name, parsedOutput, foreignOutput, notParsedOutput);

                const TString parsedLog = NYTUtils::JoinPath(config.TABLE_SPAM_DEFENCE_1D_PARSED, dateStr);
                const TString reportOwnerOutput = NYTUtils::JoinPath(config.TABLE_SPAM_DEFENCE_1D_REPORT_OWNER, dateStr);
                const TString reportUidOutput   = NYTUtils::JoinPath(config.TABLE_SPAM_DEFENCE_1D_REPORT_UID, dateStr);
                BuildOwnerReport(client, parsedLog, reportOwnerOutput);
                BuildUidReport(client, parsedLog, reportUidOutput);
                GetReportMetrics(client, reportOwnerOutput);
                GetReportMetrics(client, reportUidOutput);
            } catch (yexception &e) {
                LOG_ERROR("unable to complete process daily logs task: %s", e.what());
            }
        });
    }
    queue->Stop();
}

void ProcessStream5mLogs(NYT::IClientBasePtr client, const TConfig &config) {
    TDeque<NYTUtils::TTableInfo> newLogsList;
    NYTUtils::GetTableList(client, config.TABLE_SOURCE_FRONT_LOGS_STREAM5M_ROOT, newLogsList, Max<size_t>());
    TSet<TString> newLogsSet;
    for (const NYTUtils::TTableInfo &table : newLogsList) {
        newLogsSet.insert(NYTUtils::GetTableName(table.Name));
    }

    TDeque<NYTUtils::TTableInfo> parsedLogsList;
    NYTUtils::GetTableList(client, config.TABLE_SPAM_DEFENCE_5M_PARSED, parsedLogsList, Max<size_t>());
    TSet<TString> parsedLogsSet;
    for (const NYTUtils::TTableInfo &table : parsedLogsList) {
        parsedLogsSet.insert(NYTUtils::GetTableName(table.Name));
    }

    TDeque<TString> parsedLogsToDelete;
    TDeque<TString> newLogsToProcess;

    std::set_difference(parsedLogsSet.begin(), parsedLogsSet.end(), newLogsSet.begin(), newLogsSet.end(), std::back_inserter(parsedLogsToDelete));
    std::set_difference(newLogsSet.begin(), newLogsSet.end(), parsedLogsSet.begin(), parsedLogsSet.end(), std::back_inserter(newLogsToProcess));

    for (const TString &dateStr : parsedLogsToDelete) {
        TOpRunner(client).Drop(NYTUtils::JoinPath(config.TABLE_SPAM_DEFENCE_5M_PARSED, dateStr));
        TOpRunner(client).Drop(NYTUtils::JoinPath(config.TABLE_SPAM_DEFENCE_5M_NOT_PARSED, dateStr));
        TOpRunner(client).Drop(NYTUtils::JoinPath(config.TABLE_SPAM_DEFENCE_5M_FOREIGN, dateStr));
        TOpRunner(client).Drop(NYTUtils::JoinPath(config.TABLE_SPAM_DEFENCE_5M_REPORT_OWNER, dateStr));
        TOpRunner(client).Drop(NYTUtils::JoinPath(config.TABLE_SPAM_DEFENCE_5M_REPORT_UID, dateStr));
    }

    THolder<IThreadPool> queue(CreateThreadPool(16));
    for (const TString &dateStr : newLogsToProcess) {
        queue->SafeAddFunc([=, &client]() {
            try {
                const TString sourceLog         = NYTUtils::JoinPath(config.TABLE_SOURCE_FRONT_LOGS_STREAM5M_ROOT, dateStr);
                const TString parsedOutput      = NYTUtils::JoinPath(config.TABLE_SPAM_DEFENCE_5M_PARSED, dateStr);
                const TString foreignOutput     = NYTUtils::JoinPath(config.TABLE_SPAM_DEFENCE_5M_FOREIGN, dateStr);
                const TString notParsedOutput   = NYTUtils::JoinPath(config.TABLE_SPAM_DEFENCE_5M_NOT_PARSED, dateStr);
                ParseLogs(client, sourceLog, parsedOutput, foreignOutput, notParsedOutput);

                const TString reportOwnerOutput = NYTUtils::JoinPath(config.TABLE_SPAM_DEFENCE_5M_REPORT_OWNER, dateStr);
                const TString reportUidOutput   = NYTUtils::JoinPath(config.TABLE_SPAM_DEFENCE_5M_REPORT_UID, dateStr);
                NYT::ITransactionPtr tx = client->StartTransaction();

                auto taskUid = [&] {
                    try {
                        BuildUidReport(tx, parsedOutput, reportUidOutput);
                        GetReportMetrics(tx, reportUidOutput);
                    } catch (yexception &e) {
                        LOG_ERROR("unable to process uid stream 5m report: %s", e.what());
                    }
                };

                auto taskOwner = [&] {
                    try {
                        BuildOwnerReport(tx, parsedOutput, reportOwnerOutput);
                        GetReportMetrics(tx, reportOwnerOutput);
                    } catch (yexception &e) {
                        LOG_ERROR("unable to process owner stream 5m report: %s", e.what());
                    }
                };

                NUtils::RunAsync(taskUid, taskOwner);

                tx->Commit();
            } catch (yexception &e) {
                LOG_ERROR("unable to complete process stream 5m logs task: %s", e.what());
            }
        });
    }
    queue->Stop();

    if (!newLogsToProcess.empty()) {
        auto taskUid = [&] {
            try {
                BuildBanSuggest<TSuggestUidBanReducer>(client, config.TABLE_SPAM_DEFENCE_5M_REPORT_UID, F_USER_ID, config.TABLE_SPAM_DEFENCE_BAN_SUGGEST_UID);
            } catch (yexception &e) {
                LOG_ERROR("unable to process uid ban suggest: %s", e.what());
            }
        };

        auto taskOwner = [&] {
            try {
                BuildBanSuggest<TSuggestOwnerBanReducer>(client, config.TABLE_SPAM_DEFENCE_5M_REPORT_OWNER, F_OWNER, config.TABLE_SPAM_DEFENCE_BAN_SUGGEST_OWNER);
            } catch (yexception &e) {
                LOG_ERROR("unable to process owner ban suggest: %s", e.what());
            }
        };

        NUtils::RunAsync(taskUid, taskOwner);
    }
}

void CreatePaths(NYT::IClientBasePtr client, const TConfig &config) {
    NYTUtils::CreatePath(client, config.TABLE_SPAM_DEFENCE_1D_PARSED);
    NYTUtils::CreatePath(client, config.TABLE_SPAM_DEFENCE_1D_NOT_PARSED);
    NYTUtils::CreatePath(client, config.TABLE_SPAM_DEFENCE_1D_FOREIGN);
    NYTUtils::CreatePath(client, config.TABLE_SPAM_DEFENCE_1D_REPORT_UID);
    NYTUtils::CreatePath(client, config.TABLE_SPAM_DEFENCE_1D_REPORT_OWNER);
    NYTUtils::CreatePath(client, config.TABLE_SPAM_DEFENCE_5M_PARSED);
    NYTUtils::CreatePath(client, config.TABLE_SPAM_DEFENCE_5M_NOT_PARSED);
    NYTUtils::CreatePath(client, config.TABLE_SPAM_DEFENCE_5M_FOREIGN);
    NYTUtils::CreatePath(client, config.TABLE_SPAM_DEFENCE_5M_REPORT_UID);
    NYTUtils::CreatePath(client, config.TABLE_SPAM_DEFENCE_5M_REPORT_OWNER);
}

int Main(const TConfig &config) {
    //TFrontLogRecord record("[pid:60] Resolved request <user/beta> [in undefined~18 ms] for webmaster3-viewer.search.yandex.net:33585/temporary/user/beta/status.json?userIp=158.69.83.25&frontendIp=2a02%3A6b8%3Ac02%3Ad%3A0%3A40b4%3A45aa%3Af910&balancerRequestId=1506888598254934-6826864468279504779&balancerParentRequestId=1477368081757965-15433181987903729352&yandexUid=58748571506888396&userId=550419318&hostId=http%3At5n.xhanster.gq%3A80");
    //Cout << record.Host << Endl;
    //Cout << record.TimeMs << Endl;

    NYT::IClientPtr client = NYT::CreateClient(config.MR_SERVER_HOST_LOGS);

    CreatePaths(client, config);

    auto taskDaily = [&] {
        try {
            ProcessDailyLogs(client, config);
        } catch(yexception &e) {
            LOG_ERROR("unable to process daily logs: %s", e.what());
        }
    };

    auto taskStream5m = [&] {
        try {
            ProcessStream5mLogs(client, config);
        } catch(yexception &e) {
            LOG_ERROR("unable to process stream 5m logs: %s", e.what());
        }
    };

    NUtils::RunAsync(taskDaily, taskStream5m);

    return 0;
}

static void LogInfo(const TString &msg) {
    LOG_INFO("%s", msg.data());
}

} //namespace NWebmaster

int main(int argc, const char **argv) {
    using namespace NWebmaster;
    NYT::Initialize(argc, argv);
    TArgs::Init(argc, argv);
    TArgs::ParseOpts();

    NYTUtils::DisableLogger();

    int res = 0;
    LOG_INFO("Started");

    try {
        TOpRunner::LogInfo = LogInfo;
        TConfig::Instance().Load();
        res = Main(TConfig::Instance());
    } catch (std::exception &e) {
        LOG_CRIT("%s", e.what());
    }

    LOG_INFO("Finished");
    return res;
}
