#include "actor.h"
#include "events.h"
#include "grpc_client.h"

#include <solomon/libs/cpp/actors/util/requests_executor.h>
#include <solomon/libs/cpp/logging/logging.h>

#include <library/cpp/actors/core/event_local.h>

#include <util/generic/scope.h>
#include <util/generic/set.h>
#include <util/random/random.h>
#include <util/system/hostname.h>

/**
 * Implementation of the SWIM protocol.
 * For the original paper, key notes, algo description, differences from the original algo, etc.
 * see https://nda.ya.ru/t/W69UG5rs3VwhQD
 */
namespace NSolomon::NClusterMembership {
namespace {

using namespace NActors;
using namespace NMonitoring;

using yandex::monitoring::cluster_membership::ClusterMembershipConfig;
using yandex::solomon::config::rpc::TGrpcClientConfig;

// implementation constants
constexpr ui16 DISSEMINATION_MSGS_LIMIT = 10;

struct TGossipCmp {
    bool operator()(const TGossipPtr& lhs, const TGossipPtr& rhs) const {
        return lhs->PiggybackCnt < rhs->PiggybackCnt;
    }
};

/* State{incarnation_number}:
 *   A -- Alive
 *   S -- Suspect
 *   D -- Dead
 *
 * Order of preference:
 * [A1, A2, S2, D1, A3, S4, A4] => [D1, S4, A4, A3, S2, A2, A1]
 */
bool CompareUpdates(const TMembershipUpdate& lhs, const TMembershipUpdate& rhs) {
    switch (lhs.State) {
        case ENodeState::Unknown:
            return false;

        case ENodeState::Dead: {
            switch (rhs.State) {
                case ENodeState::Unknown:
                case ENodeState::Suspicious:
                case ENodeState::Alive:
                    return true;
                case ENodeState::Dead:
                    return false;
            }

            break;
        }

        case ENodeState::Suspicious: {
            switch (rhs.State) {
                case ENodeState::Unknown:
                    return true;
                case ENodeState::Suspicious:
                    return lhs.IncarnationNumber > rhs.IncarnationNumber;
                case ENodeState::Alive:
                    return lhs.IncarnationNumber >= rhs.IncarnationNumber;
                case ENodeState::Dead:
                    return false;
            }

            break;
        }

        case ENodeState::Alive: {
            switch (rhs.State) {
                case ENodeState::Unknown:
                    return true;
                case ENodeState::Alive:
                case ENodeState::Suspicious:
                    return lhs.IncarnationNumber > rhs.IncarnationNumber;
                case ENodeState::Dead:
                    return false;
            }

            break;
        }
    }
}

using TForwardedAcks = TVector<TAsyncAckResponse>;
using TForwardedAcksPtr = TAtomicSharedPtr<TVector<TAsyncAckResponse>>;

struct TPrivateEvents: private NSolomon::TPrivateEvents {
    enum {
        OnForwardPingCallback = SpaceBegin,
        OnProbe,
        OnPingCallback,
        OnOnePingReqResponse,
        OnPingReqCompleted,
        OnGossips,
        End
    };

    static_assert(End < SpaceEnd, "too many event types");

    struct TOnForwardPingCallback: TEventLocal<TOnForwardPingCallback, OnForwardPingCallback> {
        TString TargetAddress;
        ui32 LocalIncarnationNumber;
        TAckResponseOrErrorPtr AckOrError;
        TActorId Sender;

        TOnForwardPingCallback(TString&& address, ui32 localIncNum, TAckResponseOrErrorPtr ackOrError, TActorId sender)
            : TargetAddress{std::move(address)}
            , LocalIncarnationNumber{localIncNum}
            , AckOrError{std::move(ackOrError)}
            , Sender{sender}
        {
        }
    };

    struct TOnGossips: TEventLocal<TOnGossips, OnGossips> {
        TAtomicSharedPtr<NClusterMembership::TGossips> Gossips;

        explicit TOnGossips(decltype(Gossips) gossips)
            : Gossips{std::move(gossips)}
        {
        }
    };

    struct TOnProbe: public TEventLocal<TOnProbe, OnProbe> {
    };

    struct TOnPingCallback: public TEventLocal<TOnPingCallback, OnPingCallback> {
        TString TargetAddress;
        ui32 LocalIncarnationNumber;
        TAckResponseOrErrorPtr AckOrError;

        TOnPingCallback(TString&& target, ui32 localIncNum, TAckResponseOrErrorPtr ackOrError)
            : TargetAddress{std::move(target)}
            , LocalIncarnationNumber{localIncNum}
            , AckOrError{std::move(ackOrError)}
        {
        }
    };

    struct TOnOnePingReqResponse: public TEventLocal<TOnOnePingReqResponse, OnOnePingReqResponse> {
        TAckResponseOrErrorPtr Response;
        TNodeEndpoint Target;

        TOnOnePingReqResponse(TAckResponseOrErrorPtr response, TNodeEndpoint target)
            : Response{std::move(response)}
            , Target{std::move(target)}
        {}
    };

    struct TOnPingReqCompleted: public TEventLocal<TOnPingReqCompleted, OnPingReqCompleted> {
        TNodeEndpoint Target;
        ui32 IncarnationNumber;

        TOnPingReqCompleted(TNodeEndpoint target, ui32 incNum)
            : Target{std::move(target)}
            , IncarnationNumber{incNum}
        {}
    };
};

class TMetrics {
private:
    struct TNodeMetrics {
        TRate* PingOk;
        TRate* PingFail;
        TRate* ForwardOk;
        TRate* ForwardFail;
    };

public:
    TMetrics(TString clusterName, TMetricRegistry& registry)
        : ClusterName_{std::move(clusterName)}
        , Registry_{registry}
        , Totals_{
            .PingOk = Registry_.Rate(CreateLabels("membership.pingOk")),
            .PingFail = Registry_.Rate(CreateLabels("membership.pingFail")),
            .ForwardOk = Registry_.Rate(CreateLabels("membership.forwardOk")),
            .ForwardFail = Registry_.Rate(CreateLabels("membership.forwardFail")),
        }
    {}

    void PingOk() const {
        Totals_.PingOk->Inc();
    }

    void PingFail() const {
        Totals_.PingFail->Inc();
    }

    void ForwardOk() const {
        Totals_.ForwardOk->Inc();
    }

    void ForwardFail() const {
        Totals_.ForwardFail->Inc();
    }

private:
    [[nodiscard]]
    TLabels CreateLabels(TStringBuf metricName) const {
        TLabels labels;

        labels.Add("component", ClusterName_);
        labels.Add("sensor", metricName);

        return labels;
    }

private:
    TString ClusterName_;
    TMetricRegistry& Registry_;
    TNodeMetrics Totals_;
};


class TPingReqCtx: public IRequestCtx {
public:
    TPingReqCtx(
            IClusterMembershipSingleHostClient* client,
            TNodeEndpoint target,
            ui32 localIncNum)
        : Client_{std::move(client)}
        , Target_{std::move(target)}
        , IncarnationNumber_{localIncNum}
    {
        What_ = TStringBuilder() << "[ping-req to " << Target_ << "]";
    }

private:
    NThreading::TFuture<void> PerformRequest() override {
        ++TimesPerformed_;

        auto request = Client_->Ping(Target_, {});
        return HandleResult(request);
    }

    NThreading::TFuture<void> HandleResult(const TAsyncAckResponse& f) {
        return f
                .Subscribe([this, reqCtxPtr{shared_from_this()}](TAsyncAckResponse f) {
                    try {
                        Result_ = std::move(f.ExtractValueSync());
                        ShouldRetry_ = Result_->Fail();
                    } catch (...) {
                        Result_ = std::make_unique<TAckResponseOrError>(
                                ::NGrpc::TGrpcStatus{CurrentExceptionMessage(), grpc::StatusCode::INTERNAL, true});

                        ShouldRetry_ = false;
                    }
                })
                .IgnoreResult();
    }

    bool ShouldRetry() const override {
        return ShouldRetry_;
    }

    TString What() const override {
        return What_;
    }

    size_t TimesPerformed() const override {
        return TimesPerformed_;
    }

    std::unique_ptr<IEventBase> MakeReplyEvent() override {
        return std::make_unique<TPrivateEvents::TOnOnePingReqResponse>(std::move(Result_), Target_);
    }

    std::unique_ptr<IEventBase> MakeCompletionEvent() override {
        return std::make_unique<TPrivateEvents::TOnPingReqCompleted>(std::move(Target_), IncarnationNumber_);
    }

private:
    IClusterMembershipSingleHostClient* Client_;
    TNodeEndpoint Target_;
    ui32 IncarnationNumber_;

    TAckResponseOrErrorPtr Result_;
    TString What_;
    size_t TimesPerformed_{0};
    bool ShouldRetry_{false};
};

class TClusterMembershipActor: public TActorBootstrapped<TClusterMembershipActor>, private TPrivateEvents {
public:
    TClusterMembershipActor(
            TString clusterName,
            ClusterMembershipConfig&& clusterMembershipConfig,
            ui16 port,
            NMonitoring::TMetricRegistry& registry,
            const IClusterMapPtr& cluster,
            TString clientId,
            EMembershipMode mode)
        : Config_{std::move(clusterMembershipConfig)}
        , PortString_{ToString(port)}
        , LocalAddress_{FQDNHostName() + ":" + PortString_}
        , Registry_{registry}
        , Metrics_{std::move(clusterName), Registry_}
        , Mode_{mode}
        , LogPrefix_{TStringBuilder() << "[" << LocalAddress_ << "] "}
    {
        // constants defined in the protocol
        // "Note that the protocol period T' has to be at least three times the round-trip estimate"
        ProtocolPeriod_ = Config_.protocol_period_seconds()
                ? TDuration::Seconds(Config_.protocol_period_seconds())
                : TDuration::Seconds(15);
        SuspicionTimeoutPeriods_ = Config_.suspicion_timeout_periods()
                ? Config_.suspicion_timeout_periods()
                : 3;
        // "Determined by the message round-trip time, which is chosen smaller than the protocol period"
        PingTimeout_ = Config_.ping_timeout_seconds()
                ? TDuration::Seconds(Config_.ping_timeout_seconds())
                : TDuration::Seconds(3);
        // Defined as t = λ * log(n) (not log2, but log_e)
        PiggybackPeriods_ = Config_.piggyback_periods()
                ? Config_.piggyback_periods()
                : 3;
        //constexpr size_t SUBGROUP_SIZE = ; // defined as k
        SubgroupSize_ = Config_.subgroup_size()
                ? Config_.subgroup_size()
                : 3;
        CheckDeadNodesEveryNEpochs_ = Config_.check_dead_nodes_every_n_epochs()
                ? Config_.check_dead_nodes_every_n_epochs()
                : 5;
        RevivingPeriods_ = Config_.reviving_periods()
                ? Config_.reviving_periods()
                : 5;

        Y_ENSURE(
                ProtocolPeriod_.Seconds() >= 3 * PingTimeout_.Seconds(),
                "the protocol period T' has to be at least three times the round-trip estimate");

        yandex::solomon::config::rpc::TGrpcClientConfig grpcConfig;
        grpcConfig.mutable_readtimeout()->set_unit(yandex::solomon::config::TimeUnit::SECONDS);
        grpcConfig.mutable_readtimeout()->set_value(PingTimeout_.Seconds());
        // TODO(ivanzhukov): set_workerthreads or an external thread pool

        GossipClusterClient_ = CreateClusterMembershipGrpcClient(grpcConfig, Registry_, std::move(clientId));

        // TODO: get values from the config
        RequestExecutorOptions_ = TRequestsExecutorOptions()
                .SetMaxRetries(3)
                .SetBackoff(TDuration::Seconds(5))
                .SetScatteringInterval(PingTimeout_)
                ;

        if (cluster) {
            AddNodes(*cluster);
        }
    }

// CM - Cluster Membership
#define CM_TRACE(...) MON_TRACE(ClusterMembership, LogPrefix_ << __VA_ARGS__)
#define CM_DEBUG(...) MON_DEBUG(ClusterMembership, LogPrefix_ << __VA_ARGS__)
#define CM_INFO(...)  MON_INFO(ClusterMembership,  LogPrefix_ << __VA_ARGS__)
#define CM_WARN(...)  MON_WARN(ClusterMembership,  LogPrefix_ << __VA_ARGS__)
#define CM_ERROR(...) MON_ERROR(ClusterMembership, LogPrefix_ << __VA_ARGS__)
#define CM_CRIT(...)  MON_CRIT(ClusterMembership,  LogPrefix_ << __VA_ARGS__)

    void Bootstrap() {
        Become(&TThis::Main);

        if (Mode_ == EMembershipMode::Active) {
            OnProbe();
        }
    }

private:
    STATEFN(Main) {
        switch (ev->GetTypeRewrite()) {
            hFunc(TGossipEvents::TSubscribe, OnSubscribe);

            hFunc(TGossipEvents::TPing, OnPing);
            hFunc(TGossipEvents::TAddNode, OnAddNode);
            hFunc(TPrivateEvents::TOnForwardPingCallback, OnForwardPingCallback);

            sFunc(TPrivateEvents::TOnProbe, OnProbe);
            hFunc(TPrivateEvents::TOnPingCallback, OnPingCallback);
            hFunc(TPrivateEvents::TOnOnePingReqResponse, OnOnePingReqResponse)
            hFunc(TPrivateEvents::TOnPingReqCompleted, OnPingReqCompleted)

            hFunc(TPrivateEvents::TOnGossips, OnGossips);
        }
    }

    void OnSubscribe(TGossipEvents::TSubscribe::TPtr& ev) {
        auto actorId = ev->Get()->ActorId;

        auto [_, isNew] = Subscribers_.emplace(actorId);
        Y_ENSURE(isNew, actorId << " is trying to subscribe multiple times");
    }

    void OnProbe() {
        // TODO(ivanzhukov@): process all updates as soon as we've sent requests to every node in a cluster (and not before!)
        ApplyAllUpdates();
        ProcessFaultyNodes();
        ++CurrentEpoch_;
        SendClusterStateToSubscribers();

        // TODO(ivanzhukov@): should time be aligned to a grid?
        // TODO(ivanzhukov@): what if every node creates a big load of requests at the same time with others?
        Schedule(ProtocolPeriod_, new TPrivateEvents::TOnProbe);

        for (const auto& address: GossipClusterClient_->Addresses()) {
            auto gossipClient = GossipClusterClient_->Get(address);
            auto nodeIt = MembershipCluster_.find(address);
            // TODO(ivanzhukov): add clients dynamically. after that get rid of that if
            if (nodeIt == MembershipCluster_.end()) {
                auto& add = address;
                CM_WARN("no client address \"" << add << "\" found in MembershipCluster_");

                continue;
            }

            auto& node = nodeIt->second;

            bool shouldWeCheckDeadNodes = CurrentEpoch_ % CheckDeadNodesEveryNEpochs_ == 0;
            bool isNodeDead = ENodeState::Dead == node.State && !node.IsResurgent && !shouldWeCheckDeadNodes;
            bool isAlreadyPinging = PendingPings_.contains(address);

            if (isNodeDead || isAlreadyPinging) {
                continue;
            }

            PendingPings_.emplace(address);

            const auto& add = address; // dirty fix. see https://clck.ru/N2DHm
            CM_DEBUG("probing " << add);

            auto localIncNum = node.IncarnationNumber;
            auto* actorSystem = TActorContext::ActorSystem();

            gossipClient->Ping(TString{address}, GetDissemination())
                    .Subscribe([target{TString{address}}, localIncNum, self{SelfId()}, actorSystem](auto fut) mutable {
                        actorSystem->Send(
                                self,
                                new TPrivateEvents::TOnPingCallback{
                                        std::move(target),
                                        localIncNum,
                                        std::move(fut.ExtractValue())});
                    });
        }
    }

    void OnPingCallback(const TPrivateEvents::TOnPingCallback::TPtr& evPtr) {
        auto& ev = *(evPtr->Get());
        auto& targetAddress = ev.TargetAddress;
        auto& localIncNum = ev.LocalIncarnationNumber;
        auto& ackOrErrorPtr = ev.AckOrError;

        if (!ackOrErrorPtr->Success()) {
            Metrics_.PingFail();
            CM_WARN("error on probing " << targetAddress << ": " << ackOrErrorPtr->Error().Msg);

            if (Config_.dont_send_ping_req()) {
                CM_ERROR(TStringBuf("got no ack for req(") << targetAddress << "). Skipping a ping-req");

                PendingPings_.erase(targetAddress);
                AddUpdate(targetAddress, TMembershipUpdate{LocalAddress_, localIncNum, ENodeState::Suspicious});

                return;
            }

            CM_WARN("trying to send a ping-req(" << targetAddress << ")");

            SendPingReq(targetAddress);
            return;
        }

        PendingPings_.erase(targetAddress);

        auto ack = ackOrErrorPtr->Value();
        auto& receivedGossipsPtr = ack.Gossips;

        if (receivedGossipsPtr && !receivedGossipsPtr->empty()) {
            Send(SelfId(), new TPrivateEvents::TOnGossips{std::move(receivedGossipsPtr)});
        }

        if (!ack.Success) {
            // for now, a node cannot send a failed ack for itself
            CM_WARN("node " << targetAddress << " sent a failed ack for itself");
            Metrics_.PingFail();

            return;
        }

        CM_DEBUG(
            "got an ack from " << targetAddress
            << ": {address: " << ack.Address << ","
            << " incarnation_number: " << ack.IncarnationNumber << "}");

        auto incNum = Max(localIncNum, ack.IncarnationNumber);
        AddUpdate(targetAddress, TMembershipUpdate{LocalAddress_, incNum, ENodeState::Alive});

        Metrics_.PingOk();
    }

    void OnPing(const TGossipEvents::TPing::TPtr& ev) {
        auto address = ev->Get()->Address;
        auto& receivedGossipsPtr = ev->Get()->Gossips;

        if (address == LocalAddress_) {
            TGossipEvents::TAck::TAckResponse ackResp{
                    LocalAddress_,
                    LocalNode_.IncarnationNumber,
                    true,
                    GetDissemination()};

            Send(ev->Sender, new TGossipEvents::TAck{std::move(ackResp)});
        } else {
            ForwardPing(address, ev->Sender);
        }

        if (receivedGossipsPtr && !receivedGossipsPtr->empty()) {
            ProcessGossips(*receivedGossipsPtr);
        }
    }

    void OnAddNode(const TGossipEvents::TAddNode::TPtr& evPtr) {
        const auto& address = evPtr->Get()->Address;
        AddNode(address);
    }

    void AddNode(const TString& address) {
        Y_ENSURE(!address.empty(), "address cannot be empty");

        if (address == LocalAddress_) {
            return;
        }

        CM_INFO("adding a new node \"" << address << "\"");

        auto [_, isNew] = UpdateList_.emplace(address, TVector<TMembershipUpdate>{});
        if (!isNew) {
            return;
        }

        MembershipCluster_[address] = {};
        GossipClusterClient_->Add(address);
    }

    void AddNodes(const IClusterMap& cluster) {
        Y_ENSURE(cluster.Size() > 0, "cluster cannot be empty");

        for (const auto& node: cluster.Nodes()) {
            if (node.NodeId == cluster.Local().NodeId) {
                continue;
            }

            AddNode(node.Endpoint);
        }
    }

    void ForwardPing(TString address, const TActorId& replyTo) {
        CM_DEBUG("forwarding ping(" << address << ")");

        auto nodeIt = MembershipCluster_.find(address);
        if (nodeIt == MembershipCluster_.end()) {
            AddNode(address);
            nodeIt = MembershipCluster_.find(address);
        }

        const auto& node = nodeIt->second;

        switch (node.State) {
            case ENodeState::Dead: {
                TGossipEvents::TAck::TAckResponse ackResp{
                        address,
                        node.IncarnationNumber,
                        false,
                        GetDissemination()};

                Send(replyTo, new TGossipEvents::TAck{std::move(ackResp)});
                return;
            }
            case ENodeState::Alive:
            case ENodeState::Suspicious:
            case ENodeState::Unknown:
                break;
        }

        auto client = GossipClusterClient_->Get(address);
        auto* actorSystem = TActorContext::ActorSystem();
        auto localIncNum = node.IncarnationNumber;

        // TODO(ivanzhukov@): ping timeout, backoff and retries
        client->Ping(address, GetDissemination())
                .Subscribe([self{SelfId()}, actorSystem, address, localIncNum, replyTo](auto respFuture) mutable {
                    actorSystem->Send(
                            self,
                            new TPrivateEvents::TOnForwardPingCallback{
                                    std::move(address),
                                    localIncNum,
                                    std::move(respFuture.ExtractValue()),
                                    replyTo});
                });
    }

    void OnForwardPingCallback(const TPrivateEvents::TOnForwardPingCallback::TPtr& evPtr) {
        auto& ev = *(evPtr->Get());
        auto& address = ev.TargetAddress;
        auto& localIncNum = ev.LocalIncarnationNumber;
        auto& ackOrErrorPtr = ev.AckOrError;

        TGossipEvents::TAck::TAckResponse ackResp;
        ackResp.Address = address;
        ackResp.Gossips = GetDissemination();

        if (!ackOrErrorPtr->Success()) {
            TString errMsg = TStringBuilder() << "failed to get an ack for the forwarded ping(" << address << ")"
                                              << ": " << ackOrErrorPtr->Error().Msg;

            CM_ERROR(errMsg);

            ackResp.IncarnationNumber = localIncNum;
            ackResp.Success = false;
        } else {
            auto& ack = ackOrErrorPtr->Value();

            auto& receivedGossipsPtr = ack.Gossips;
            if (receivedGossipsPtr && !receivedGossipsPtr->empty()) {
                Send(SelfId(), new TPrivateEvents::TOnGossips{std::move(receivedGossipsPtr)});
            }

            ackResp.IncarnationNumber = ack.IncarnationNumber;
            ackResp.Success = ack.Success;
        }

        Send(ev.Sender, new TGossipEvents::TAck{std::move(ackResp)});
    }

    void OnOnePingReqResponse(const TPrivateEvents::TOnOnePingReqResponse::TPtr& evPtr) {
        auto& ev = *(evPtr->Get());

        auto it = ForwardedAcks_.find(ev.Target);
        if (it == ForwardedAcks_.end()) {
            it = ForwardedAcks_.emplace(ev.Target, TVector<TAckResponseOrErrorPtr>{}).first;
        }

        it->second.emplace_back(std::move(ev.Response));
    }

    void OnPingReqCompleted(const TPrivateEvents::TOnPingReqCompleted::TPtr& evPtr) {
        const auto& ev = *(evPtr->Get());

        bool atLeastOneSuccess = false;
        ui32 errCnt = 0;
        TString errMsg;
        ui32 maxReceivedIncNum{0};

        for (const auto& ackOrErrorPtr: ForwardedAcks_[ev.Target]) {
            const auto& ackOrError = *ackOrErrorPtr;

            if (ackOrError.Success()) {
                auto ack = ackOrError.Value();

                if (ack.Success) {
                    atLeastOneSuccess = true;
                    maxReceivedIncNum = Max(maxReceivedIncNum, ack.IncarnationNumber);
                }

                auto& receivedGossipsPtr = ack.Gossips;
                if (receivedGossipsPtr && !receivedGossipsPtr->empty()) {
                    ProcessGossips(*receivedGossipsPtr);
                }
            } else {
                if (0 == errCnt) {
                    errMsg += ": ";
                }

                if (++errCnt <= 5) {
                    errMsg += TStringBuilder() << "(error#" << errCnt << "): " << ackOrError.Error().Msg;
                    errMsg += ". ";
                }
            }
        }

        auto& targetAddress = ev.Target;
        auto localIncNum = ev.IncarnationNumber;

        if (!atLeastOneSuccess) {
            Metrics_.ForwardFail();
            AddUpdate(targetAddress, TMembershipUpdate{LocalAddress_, localIncNum, ENodeState::Suspicious});

            CM_ERROR(TStringBuf("got no ack for ping-req(") << targetAddress << ")" << errMsg);
        } else {
            auto incNum = Max(localIncNum, maxReceivedIncNum);

            CM_DEBUG(
                "got ack:"
                << " {address: " << targetAddress << ","
                << " incarnation_number: " << incNum << "}");

            Metrics_.ForwardOk();
            AddUpdate(targetAddress, TMembershipUpdate{LocalAddress_, incNum, ENodeState::Alive});
        }

        PendingPings_.erase(targetAddress);
        ForwardedAcks_.erase(targetAddress);
    }

    void ProcessGossips(const NClusterMembership::TGossips& gossips) {
        for (auto& gossipPtr: gossips) {
            if (gossipPtr) {
                AddUpdate(gossipPtr->Target, std::move(gossipPtr->Update));
            }
        }
    }

    void OnGossips(const TPrivateEvents::TOnGossips::TPtr& evPtr) {
        ProcessGossips(*evPtr->Get()->Gossips);
    }

    /**
     * Send a ping(target) request to all neighbors so that they can ask target and forward responses to us
     * @param target a node that we want to collect info about
     */
    void SendPingReq(TString target) {
        TVector<TNodeEndpoint> aliveNodes(::Reserve(MembershipCluster_.size()));
        size_t subgroupSize = SubgroupSize_;

        for (const auto& [endpoint, info]: MembershipCluster_) {
            // TODO: optimization -- collect Alive nodes first, only then Suspicious nodes
            if (endpoint != target && info.State != ENodeState::Dead) {
                aliveNodes.emplace_back(endpoint);
            }
        }

        subgroupSize = Min(subgroupSize, aliveNodes.size());
        THashSet<TNodeEndpoint> subgroup;
        subgroup.reserve(subgroupSize);

        if (subgroupSize == 0) {
            return;
        }

        if (subgroupSize == aliveNodes.size()) {
            // shortcut
            for (const auto& endpoint: aliveNodes) {
                subgroup.emplace(endpoint);
            }
        } else {
            while (subgroup.size() != subgroupSize) {
                auto idx = RandomNumber(aliveNodes.size());
                subgroup.emplace(aliveNodes[idx]);
            }
        }

        auto nodeIt = MembershipCluster_.find(target);
        if (nodeIt == MembershipCluster_.end()) {
            auto& address = target;
            CM_DEBUG("unknown node: \"" << address << "\". cannot send a ping-req");

            return;
        }

        auto localIncNum = nodeIt->second.IncarnationNumber;
        TForwardedAcksPtr responses = MakeAtomicShared<TForwardedAcks>();

        TVector<IRequestCtxPtr> requests;

        for (const auto& endpoint: subgroup) {
            auto client = GossipClusterClient_->Get(endpoint);

            requests.emplace_back(new TPingReqCtx{client, target, localIncNum});
        }

        Register(CreateRequestsExecutorActor(RequestExecutorOptions_, std::move(requests), SelfId()).Release());
    }

    void AddUpdate(TString address, TMembershipUpdate&& update) {
        auto it = UpdateList_.find(address);
        if (it == UpdateList_.end()) {
            CM_WARN("unknown node: \"" << address << "\". cannot add an update");
            return;
        }

        if (update.Supervisor != LocalAddress_) {
            CM_DEBUG(
                "got a gossip: {"
                << "supervisor: " << update.Supervisor
                << ", target: " << address
                << ", " << update.State << "#" << update.IncarnationNumber
                << "}");
        }

        it->second.emplace_back(std::move(update));
    }

    static void LeaveOnlyOneRelevantUpdate(TVector<TMembershipUpdate>& updates) {
        if (updates.size() <= 1) {
            return;
        }

        Sort(updates.begin(), updates.end(), CompareUpdates);
        if (updates.size() > 1) {
            updates.erase(updates.begin() + 1, updates.end());
        }
        // TODO(ivanzhukov@): how often to shrink?

        Y_VERIFY_DEBUG(updates.size() == 1, "wrong size after processing");
    }

    TAtomicSharedPtr<TGossips> GetDissemination() {
        decltype(GetDissemination()) gossips;

        if (DisseminationMessages_.empty()) {
            return gossips;
        }

        gossips = new TGossips{};

        while (gossips->size() < DISSEMINATION_MSGS_LIMIT && !DisseminationMessages_.empty()) {
            auto nh = DisseminationMessages_.extract(DisseminationMessages_.begin());
            gossips->emplace_back(nh.value());
        }

        for (auto& gossipPtr: *gossips) {
            if (++gossipPtr->PiggybackCnt < PiggybackPeriods_) {
                DisseminationMessages_.insert(DisseminationMessages_.begin(), gossipPtr);
            }
        }

        return gossips;
    }

    void SendClusterStateToSubscribers() const {
        auto clusterStatePtr = std::make_shared<decltype(MembershipCluster_)>(MembershipCluster_);

        for (auto& subId: Subscribers_) {
            Send(subId, new TGossipEvents::TClusterStateResponse{clusterStatePtr});
        }
    }

    // TODO(ivanzhukov@): add selfmon metrics
    void ApplyAllUpdates() {
        // TODO(ivanzhukov@):
        //  1. Wait for all current async reqs
        for (auto& [address, updates]: UpdateList_) {
            if (updates.empty()) {
                continue;
            }

            Y_SCOPE_EXIT(upds{&updates}) {
                upds->clear();
            };

            LeaveOnlyOneRelevantUpdate(updates);
            const auto& update = updates[0];

            if (address == LocalAddress_) {
                if (ENodeState::Suspicious == update.State) {
                    if (update.IncarnationNumber >= LocalNode_.IncarnationNumber) {
                        LocalNode_.IncarnationNumber = update.IncarnationNumber + 1;
                        LocalNode_.UpdatedAtEpoch = CurrentEpoch_;
                    }

                    DisseminationMessages_.emplace(
                            new TGossip{
                                    LocalAddress_,
                                    LocalAddress_,
                                    LocalNode_.IncarnationNumber,
                                    ENodeState::Alive});

                    CM_DEBUG("we've been suspected. sending an Alive#" << LocalNode_.IncarnationNumber << " msg");
                }

                continue;
            }

            auto nodeIt = MembershipCluster_.find(address);
            if (nodeIt == MembershipCluster_.end()) {
                auto& add = address;
                CM_WARN("no address \"" << add << "\" found in MembershipCluster_. cannot apply an update");

                return;
            }

            auto& node = nodeIt->second;

            if (ENodeState::Dead == node.State && update.Supervisor != LocalAddress_) {
                continue;
            }

            node.IncarnationNumber = Max(node.IncarnationNumber, update.IncarnationNumber);
            ENodeState resultingState;

            if (node.State != ENodeState::Dead) {
                resultingState = update.State;
            } else {
                resultingState = ENodeState::Dead;

                if (!node.IsResurgent && update.State == ENodeState::Alive) {
                    // it was dead, but started to respond
                    node.IsResurgent = true;
                    node.UpdatedAtEpoch = CurrentEpoch_;
                } else if (node.IsResurgent && update.State != ENodeState::Alive) {
                    // it was acting alive first, but then failed -- don't hesitate and mark it as dead
                    node.IsResurgent = false;
                    node.UpdatedAtEpoch = CurrentEpoch_;
                }
            }

            TString resultingStateString = ToString(resultingState);
            if (node.IsResurgent) {
                resultingStateString += "{resurgent}";
            }

            auto& add = address; // dirty fix. see https://clck.ru/N2DHm
            CM_DEBUG("update for " << add << ":"
                             << " {" << node.State << "#" << node.IncarnationNumber << "}"
                             << " + {" << update.State << "#" << update.IncarnationNumber << "}"
                             << " -> {" << resultingStateString << "#" << node.IncarnationNumber << "}");

            if (node.State != resultingState) {
                node.UpdatedAtEpoch = CurrentEpoch_;
                node.State = resultingState;

                if (update.Supervisor == LocalAddress_) {
                    // Tell other nodes about this update
                    DisseminationMessages_.emplace(
                            new TGossip{
                                    address,
                                    LocalAddress_,
                                    node.IncarnationNumber,
                                    resultingState});
                }
            }
        }
    }

    void MarkNodeAs(const TNodeEndpoint& address, TNodeInfo& node, ENodeState state) {
        CM_DEBUG("update for " << address << ":"
                         << " {" << node.State << "#" << node.IncarnationNumber << "}"
                         << " ->"
                         << " {" << state << "#" << node.IncarnationNumber << "}");

        node.UpdatedAtEpoch = CurrentEpoch_;
        node.State = state;

        DisseminationMessages_.emplace(
                new TGossip{
                        address,
                        LocalAddress_,
                        node.IncarnationNumber,
                        state});
    }

    void ProcessFaultyNodes() {
        for (auto& [address, node]: MembershipCluster_) {
            auto& nd = node; // dirty fix. see https://clck.ru/N2DHm
            auto longerThan = [&](ui16 numOfPeriods) {
                return CurrentEpoch_ - nd.UpdatedAtEpoch > numOfPeriods;
            };

            if (node.State == ENodeState::Suspicious && longerThan(SuspicionTimeoutPeriods_)) {
                MarkNodeAs(address, node, ENodeState::Dead);
            } else if (node.IsResurgent && longerThan(RevivingPeriods_)) {
                MarkNodeAs(address, node, ENodeState::Alive);
            }
        }
    }

private:
    TNodeInfo LocalNode_;
    TClusterMembershipState MembershipCluster_;
    THashMap<TString, TVector<TMembershipUpdate>> UpdateList_;
    // TODO(ivanzhukov@): add shards load, host load
    // TODO(ivanzhukov@): add a metric for DisseminationMessages_.size()
    TSet<TGossipPtr, TGossipCmp> DisseminationMessages_;

    ClusterMembershipConfig Config_;
    TString PortString_;
    TString LocalAddress_;
    NMonitoring::TMetricRegistry& Registry_;
    TMetrics Metrics_;
    EMembershipMode Mode_;
    IClusterMembershipClientPtr GossipClusterClient_;
    TString LogPrefix_;
    ui32 CurrentEpoch_{0};
    TSet<TActorId> Subscribers_;
    TRequestsExecutorOptions RequestExecutorOptions_;
    THashSet<TNodeEndpoint> PendingPings_;
    THashMap<TNodeEndpoint, TVector<TAckResponseOrErrorPtr>> ForwardedAcks_;

    // constants defined in the protocol
    TDuration ProtocolPeriod_;
    ui16 SuspicionTimeoutPeriods_;
    TDuration PingTimeout_;
    ui16 PiggybackPeriods_;
    ui16 SubgroupSize_;
    ui16 CheckDeadNodesEveryNEpochs_;
    ui16 RevivingPeriods_;
};

#undef CM_TRACE
#undef CM_DEBUG
#undef CM_INFO
#undef CM_WARN
#undef CM_ERROR
#undef CM_CRIT

} // namespace

THolder<IActor> CreateClusterMembershipActor(
        TString clusterName,
        ClusterMembershipConfig clusterMembershipConfig,
        ui16 port,
        NMonitoring::TMetricRegistry& registry,
        IClusterMapPtr cluster,  // NOLINT(performance-unnecessary-value-param): false positive
        TString clientId,
        EMembershipMode mode)
{
    return MakeHolder<TClusterMembershipActor>(
            std::move(clusterName),
            std::move(clusterMembershipConfig),
            port,
            registry,
            std::move(cluster),
            std::move(clientId),
            mode);
}

} // namespace NSolomon::NClusterMembership
