#include <util/datetime/cputimer.h>
#include <util/draft/datetime.h>
#include <util/generic/vector.h>
#include <util/generic/deque.h>
#include <util/generic/hash_set.h>
#include <util/generic/size_literals.h>

#include <util/string/reverse.h>
#include <library/cpp/containers/comptrie/comptrie.h>
#include <library/cpp/containers/comptrie/prefix_iterator.h>
#include <library/cpp/robots_txt/robots_txt.h>
#include <library/cpp/robots_txt/robotstxtcfg.h>
#include <library/cpp/string_utils/url/url.h>
#include <library/cpp/uri/common.h>

#include <mapreduce/yt/interface/client.h>

#include <robot/jupiter/protos/acceptance.pb.h>
#include <robot/jupiter/protos/export.pb.h>
#include <robot/library/yt/static/command.h>
#include <robot/library/yt/static/tags.h>

#include <wmconsole/version3/library/jupiter/jupiter.h>
#include <wmconsole/version3/processors/user_sessions/exports/catalogia/protos/catalogia.pb.h>
#include <wmconsole/version3/wmcutil/log.h>
#include <wmconsole/version3/wmcutil/url.h>
#include <wmconsole/version3/wmcutil/yt/triggers.h>
#include <wmconsole/version3/wmcutil/yt/yt_utils.h>

#include "config.h"
#include "utils.h"

#include "task_robots.h"

namespace NWebmaster {
namespace NCatalogia {

using namespace NJupiter;

static const TInputTag<NJupiter::THostdatForWebmaster> SpreadHostdatInputTag        (1);
static const TInputTag<NJupiter::TAcceptanceHostRecord> AcceptanceHostdatInputTag   (2);
static const TInputTag<NProto::TCatalogiaHost> HostInputTag                         (3);
static const TInputTag<NProto::TCatalogiaRobots> RobotsInputTag                     (4);

static const TOutputTag<NProto::TCatalogiaRobots> RobotsOutputTag                   (5);

struct TCatalogiaPrepareRobotsReducer : public TTaggedReducer {
    TCatalogiaPrepareRobotsReducer() = default;
    TCatalogiaPrepareRobotsReducer(const TVector<char> &trieStream)
        : TrieStream(trieStream)
    {
    }

    void Save(IOutputStream& stream) const override {
        ::Save(&stream, TrieStream);
        TTaggedReducer::Save(stream);
    }

    void Load(IInputStream& stream) override {
        ::Load(&stream, TrieStream);
        TTaggedReducer::Load(stream);
    }

    void StartTagged(TTagedWriter /*writer*/) override {
        Trie.Init(&TrieStream[0], TrieStream.size());
    }

    void DoTagged(TTagedReader reader, TTagedWriter writer) final {
        TMaybe<NJupiter::THostdatForWebmaster> mbSpread = reader.GetRowMaybe(SpreadHostdatInputTag);
        TMaybe<NJupiter::TAcceptanceHostRecord> mbHostdat = reader.GetRowMaybe(AcceptanceHostdatInputTag);

        TString host;
        if (mbSpread.Defined()) {
            host = mbSpread.GetRef().GetHost();
        } else if (mbHostdat.Defined()) {
            host = mbHostdat.GetRef().GetHost();
        } else {
            ythrow yexception() << "Something went wrong";
        }

        TString rhost = host;
        ReverseInPlace(rhost);

        bool found = false;
        for (auto it = MakePrefixIterator(Trie, rhost.data(), rhost.size()); it; ++it) {
            const TString owner = host.substr(host.size() - it.GetPrefixLen());

            if (NUtils::IsSubdomain(host, owner)) {
                found = true;
                break;
            }
        }

        if (!found) {
            return;
        }

        TMap<time_t, TString> hostInfoRecords;
        for (const auto &row : reader.GetRows(SpreadHostdatInputTag)) {
            if (!row.GetRobots().empty()) {
                hostInfoRecords[row.GetLastAccess()] = row.GetRobots();
            }
        }

        for (const auto &row : reader.GetRows(AcceptanceHostdatInputTag)) {
            if (!row.GetRobots().empty()) {
                hostInfoRecords[row.GetLastAccess()] = row.GetRobots();
            }
        }

        if (!hostInfoRecords.empty()) {
            time_t lastAccess = hostInfoRecords.rbegin()->first;
            TString &robotsRaw = hostInfoRecords.rbegin()->second;

            auto multi = reinterpret_cast<const host_multirobots_t*>(robotsRaw.data());
            if (!multi->IsValid(robotsRaw.size())) {
                return;
            }

            const char* packed = nullptr;
            if (multi->GetRobots(packed)) {
                TRobotsTxt filter;
                filter.LoadPacked(packed);
                auto cleanParams = filter.GetCleanParams();
                if (cleanParams) {
                    TStringBuilder robotsBuilder;
                    robotsBuilder << "host:" << CutHttpPrefix(host, true /*ignoreHttps*/) << Endl;
                    for (const auto& cleanParam : cleanParams) {
                        robotsBuilder << "clean-param: " << cleanParam << Endl;
                    }

                    NProto::TCatalogiaRobots dstMsg;
                    dstMsg.SetHost(host);
                    dstMsg.SetLastAccess(lastAccess);
                    dstMsg.SetRobots(robotsBuilder);
                    writer.AddRow(dstMsg, RobotsOutputTag);
                }
            }
        }
    }

public:
    TVector<char> TrieStream;
    TCompactTrie<char> Trie;
};

REGISTER_REDUCER(TCatalogiaPrepareRobotsReducer)

struct TRobotsFilterReducer : public TTaggedReducer {
    void DoTagged(TTagedReader reader, TTagedWriter writer) override {
        TMaybe<NProto::TCatalogiaHost> filter = reader.GetRowMaybe(HostInputTag);
        reader.SkipRows(HostInputTag);
        if (!reader.IsValid() || !filter.Defined()) {
            return;
        }

        for (auto row : reader.GetRows(RobotsInputTag)) {
            writer.AddRow(row, RobotsOutputTag);
        }
    }
};

REGISTER_REDUCER(TRobotsFilterReducer)

int TaskRobots(int, const char **) {
    const auto &cfg = TConfig::CInstance();

    NYT::IClientPtr clientMain = NYT::CreateClient(cfg.MR_SERVER_HOST_MAIN);
    NYT::IClientPtr clientCatalogia = NYT::CreateClient(cfg.MR_SERVER_HOST_CATALOGIA);

    NYTUtils::CreatePath(clientMain, cfg.TABLE_CATALOGIA_SOURCE_ROOT);

    const TString hostTable = GetJupiterAcceptanceHostTable(clientMain);
    const time_t hostTableTs = GetJupiterTsTZFromPath(hostTable);

    TYtSourceTrigger robotsTrigger(clientMain, cfg.TABLE_CATALOGIA_SOURCE_ROBOTS);
    if (!robotsTrigger.NeedUpdate(hostTable)) {
        LOG_INFO("user_sessions, robots is already updated");
        return 0;
    }

    THashSet<TString> domains;
    TVector<char> domainsTrieStream;
    LoadCatalogiaDomains(clientCatalogia, cfg.TABLE_SOURCE_CATALOGIA_DOMAINS, domains, domainsTrieStream);
    LOG_INFO("user_sessions, robots, loaded %lu domains, trie %lu bytes", domains.size(), domainsTrieStream.size());

    NYT::ITransactionPtr tx = clientMain->StartTransaction();

    TReduceCmd<TCatalogiaPrepareRobotsReducer> cmd(tx, new TCatalogiaPrepareRobotsReducer(domainsTrieStream));
    TDeque<NYTUtils::TTableInfo> hostSpreadTables;
    LoadHostSpreadTables(clientMain, hostTableTs, hostSpreadTables);
    for (const NYTUtils::TTableInfo &table : hostSpreadTables) {
        cmd.Input(TTable<NJupiter::THostdatForWebmaster>(tx, table.Name), SpreadHostdatInputTag);
        LOG_INFO("robots, input %s", table.Name.c_str());
    }

    LOG_INFO("robots, input %s", hostTable.c_str());
    LOG_INFO("robots, output %s", cfg.TABLE_CATALOGIA_SOURCE_ROBOTS.c_str());
    cmd
        .Input(TTable<NJupiter::TAcceptanceHostRecord>(tx, hostTable), AcceptanceHostdatInputTag)
        .Output(
            TTable<NProto::TCatalogiaRobots>(tx, cfg.TABLE_CATALOGIA_SOURCE_ROBOTS)
                .AsSortedOutput({"Host"}),
            RobotsOutputTag
        )
        .MemoryLimit(2_GBs)
        .OperationWeight(cfg.OPERATION_WEIGHT)
        .ReduceBy({"Host"})
        .Do()
    ;

    TSortCmd<NProto::TCatalogiaRobots>(tx, TTable<NProto::TCatalogiaRobots>(tx, cfg.TABLE_CATALOGIA_SOURCE_ROBOTS))
        .OperationWeight(cfg.OPERATION_WEIGHT)
        .By({"Host"})
        .Do()
    ;

    TReduceCmd<TRobotsFilterReducer>(tx)
        .OperationWeight(cfg.OPERATION_WEIGHT)
        .Input(TTable<NProto::TCatalogiaHost>(tx, cfg.TABLE_CATALOGIA_SOURCE_HOSTS_FLT), HostInputTag)
        .Input(TTable<NProto::TCatalogiaRobots>(tx, cfg.TABLE_CATALOGIA_SOURCE_ROBOTS), RobotsInputTag)
        .Output(TTable<NProto::TCatalogiaRobots>(tx, cfg.TABLE_CATALOGIA_SOURCE_ROBOTS_FLT)
            .AsSortedOutput({"Host"}), RobotsOutputTag
        )
        .ReduceBy({"Host"})
        .CpuLimit(0.2)
        .Do()
    ;

    robotsTrigger.Update(tx, hostTable);
    tx->Commit();

    return 0;
}

} //namespace NCatalogia
} //namespace NWebmaster
