#include "events.h"
#include "requester.h"
#include "shard_actor.h"

#include <solomon/services/dataproxy/lib/hash/metabase_request_hash.h>
#include <solomon/services/dataproxy/lib/message_cache/cache_actor.h>

#include <solomon/libs/cpp/actors/scheduler/scheduler.h>
#include <solomon/libs/cpp/http/server/handlers/metrics.h>
#include <solomon/libs/cpp/logging/logging.h>
#include <solomon/libs/cpp/proto_convert/metric_type.h>
#include <solomon/libs/cpp/trace/trace.h>

#include <solomon/protos/metabase/status_code.grpc.pb.h>
#include <solomon/protos/metabase/grpc_find.pb.h>

#include <library/cpp/actors/core/actor_bootstrapped.h>
#include <library/cpp/actors/core/hfunc.h>
#include <library/cpp/containers/absl_flat_hash/flat_hash_map.h>
#include <library/cpp/containers/absl_flat_hash/flat_hash_set.h>

#include <util/generic/scope.h>
#include <util/generic/size_literals.h>
#include <util/string/hex.h>

#include <queue>

namespace NSolomon::NDataProxy {

using namespace NActors;
using namespace NTracing;
using namespace yandex::solomon::metabase;

namespace {

/**
 * Note: these metrics are shared across multiple shards within one project!
 */
class TMetrics: public NMonitoring::IMetricSupplier {
public:
    TMetrics(TStringBuf replica, TStringBuf project)
        : Registry{{{"projectId", project}, {"replica", replica}}}
    {
        Inflight = Registry.IntGauge({{"sensor", "metabase.inflight"}});
        Postponed = Registry.IntGauge({{"sensor", "metabase.postponed"}});
        Subscribers = Registry.IntGauge({{"sensor", "metabase.subscribers"}});
    }

private:
    void Accept(TInstant time, NMonitoring::IMetricConsumer* consumer) const override {
        Registry.Accept(time, consumer);
    }

    void Append(TInstant time, NMonitoring::IMetricConsumer* consumer) const override {
        Registry.Append(time, consumer);
    }

public:
    NMonitoring::TMetricRegistry Registry;
    NMonitoring::IIntGauge* Inflight;
    NMonitoring::IIntGauge* Postponed;
    NMonitoring::IIntGauge* Subscribers;
};

enum class ERequestTypes: ui8 {
    Find = 0,
    ResolveOne,
    ResolveMany,
    MetricNames,
    LabelValues,
    LabelNames,
    UniqueLabels,
};

enum class ECacheReqState: ui8 {
    Absent,
    Inflight,
};

enum class EMetabaseReqState: ui8 {
    New,
    Inflight,
    Postponed,
};

struct TSubscriberInfo {
    TActorId Id;
    ui64 Cookie;
    TInstant Deadline;
    TSpanId Span;

    TSubscriberInfo(TActorId id, ui64 cookie, TInstant deadline, TSpanId span)
        : Id(id)
        , Cookie(cookie)
        , Deadline(deadline)
        , Span(std::move(span))
    {}
};

using TReqId = TString;

class TRetryState {
public:
    void Retry() noexcept {
        NextDelay_ = Min(NextDelay_ * 2, TDuration::Seconds(10));
    }

    TDuration NextDelay() const noexcept {
        return NextDelay_;
    }

private:
    TDuration NextDelay_{TDuration::Seconds(2)};
};

using yandex::solomon::metabase::FindRequest;
using yandex::solomon::metabase::FindResponse;

class TFindResultMerger {
public:
    TFindResultMerger(FindRequest req)
        : Request_{std::move(req)}
    { 
    }

    const FindRequest& NextRequest() {
        Request_.mutable_sliceoptions()->set_offset(Offset_);
        return Request_;
    }

    void AddResponse(const FindResponse& resp) {
        TotalCount_ = resp.GetTotalCount();
        Offset_ += resp.metrics_size();
        Response_.MergeFrom(resp);
    }

    bool IsReady() const {
        return Offset_ >= TotalCount_;
    }

    FindResponse MergedResponse() && {
        Y_ENSURE(IsReady());
        return std::move(Response_);
    }

private:
    FindRequest Request_; 
    FindResponse Response_;
    ui32 Offset_{0};
    ui32 TotalCount_{0};
};

// probably could be reused in other actors (memstore, stockpile, etc.) with some minor changes
class TRequestsManager {
public:
    class TReqCtx: public std::enable_shared_from_this<TReqCtx> {
        friend class TRequestsManager;

    public:
        [[nodiscard]] bool Fresh() const {
            return CacheReqState == ECacheReqState::Absent && MetabaseReqState == EMetabaseReqState::New;
        }

    public:
        TReqId Id;
        ui64 RetryId{0}; // 0 means there is no scheduled request
        ECacheReqState CacheReqState{ECacheReqState::Absent};
        EMetabaseReqState MetabaseReqState{EMetabaseReqState::New};
        // XXX(ivanzhukov): limit the size?
        std::unique_ptr<IEventHandle> OriginalRequest;
        /**
         * Number of inflight requests sent to the cache actor and/or the metabase rpc
         */
        ui32 InFlight{0};
        int MessageType;
        TInstant ReceivedAt{};
        TRetryState RetryState;
        std::optional<TFindResultMerger> FindMerger;

    private:
        /**
         * Only TRequestsManager can access this field
         */
        TVector<TSubscriberInfo> ResponseSubscribers_;
    };

    struct TPostponedReqCompare {
        bool operator()(const TReqCtx* left, const TReqCtx* right) const {
            return left->ReceivedAt < right->ReceivedAt;
        }
    };

    TRequestsManager(TMetrics* metrics)
        : Metrics_{metrics}
    {
    }

    std::shared_ptr<TReqCtx> GetReqCtx(const TReqId& reqId) {
        auto it = ReqIdToReqCtx_.find(reqId);
        Y_VERIFY(it != ReqIdToReqCtx_.end(), "Context is missing for active request");
        return it->second;
    }

    std::shared_ptr<TReqCtx> GetOrCreateReqCtx(const TReqId& reqId, TInstant now = {}) {
        auto it = ReqIdToReqCtx_.find(reqId);

        if (it == ReqIdToReqCtx_.end()) {
            it = ReqIdToReqCtx_.emplace(reqId, std::make_shared<TReqCtx>()).first;
            it->second->ReceivedAt = now;
            it->second->Id = reqId;
        }

        return it->second;
    }

    void EraseReqCtx(TReqCtx& reqCtx) {
        if (auto retryId = reqCtx.RetryId) {
            ScheduledRetryIds_.erase(retryId);
        }

        auto postponedIt = PostponedReqs_.find(&reqCtx);
        while (postponedIt != PostponedReqs_.end() && (*postponedIt)->ReceivedAt == reqCtx.ReceivedAt) {
            if (&reqCtx == *postponedIt) {
                PostponedReqs_.erase(postponedIt);
                break;
            }

            ++postponedIt;
        }

        ReqIdToReqCtx_.erase(reqCtx.Id);
    }

    void AddRetryId(ui64 retryId) {
        ScheduledRetryIds_.emplace(retryId);
    }

    bool IsKnownRetry(ui64 retryId) {
        return ScheduledRetryIds_.contains(retryId);
    }

    const TVector<TSubscriberInfo>& GetSubscribers(const TReqCtx& reqCtx) const {
        return reqCtx.ResponseSubscribers_;
    }

    template<typename... Args>
    void AddSubscriber(TReqCtx& reqCtx, Args&&... args) {
        auto& sub = reqCtx.ResponseSubscribers_.emplace_back(std::forward<Args>(args)...);
        SubscriberToReqCtx_.emplace(sub.Id, &reqCtx);

        if (Metrics_) {
            Metrics_->Subscribers->Inc();
        }
    }

    void DeleteSubscribers(TReqCtx& reqCtx) {
        if (Metrics_) {
            i64 negSubs = -1ll * static_cast<i64>(reqCtx.ResponseSubscribers_.size());
            Metrics_->Subscribers->Add(negSubs);
        }

        for (const auto& sub: reqCtx.ResponseSubscribers_) {
            SubscriberToReqCtx_.erase(sub.Id);
        }

        reqCtx.ResponseSubscribers_.clear();
    }

    void DeleteSubscriber(TActorId subId) {
        auto it = SubscriberToReqCtx_.find(subId);
        if (it == SubscriberToReqCtx_.end()) {
            return;
        }

        if (Metrics_) {
            Metrics_->Subscribers->Dec();
        }

        std::erase_if(it->second->ResponseSubscribers_, [&](const auto& sub) {
            return sub.Id == subId;
        });
        SubscriberToReqCtx_.erase(it);
    }

    void DeleteSubscribersWhoTimedOut(TReqCtx& reqCtx, TInstant now) {
        i64 deletedCnt = 0;
        auto it = reqCtx.ResponseSubscribers_.begin();

        while (it != reqCtx.ResponseSubscribers_.end()) {
            if (it->Deadline <= now) {
                SubscriberToReqCtx_.erase(it->Id);
                it = reqCtx.ResponseSubscribers_.erase(it);
                ++deletedCnt;
            } else {
                ++it;
            }
        }

        if (Metrics_) {
            Metrics_->Subscribers->Add(-1l * deletedCnt);
        }
    }

    void AddPostponedRequest(TReqCtx* reqCtx) {
        PostponedReqs_.emplace(reqCtx);

        if (Metrics_) {
            Metrics_->Postponed->Inc();
        }
    }

    TReqCtx* ExtractPostponedRequestToProcess() {
        if (PostponedReqs_.empty()) {
            return nullptr;
        }

        TReqCtx* reqCtxPtr = *PostponedReqs_.begin();
        PostponedReqs_.erase(PostponedReqs_.begin());

        if (Metrics_) {
            Metrics_->Postponed->Dec();
        }

        return reqCtxPtr;
    }

    void IncInflight(TReqCtx& reqCtx) {
        ++reqCtx.InFlight;
        ++TotalInflight_;

        if (Metrics_) {
            Metrics_->Inflight->Inc();
        }
    }

    void DecInflight(TReqCtx& reqCtx) {
        --reqCtx.InFlight;
        --TotalInflight_;

        if (Metrics_) {
            Metrics_->Inflight->Dec();
        }
    }

    size_t GetTotalInflightCount() const {
        return TotalInflight_;
    }

private:
    TMetrics* Metrics_;
    absl::flat_hash_map<TReqId, std::shared_ptr<TReqCtx>> ReqIdToReqCtx_;
    absl::flat_hash_set<ui64> ScheduledRetryIds_;
    absl::flat_hash_map<TActorId, TReqCtx*, THash<TActorId>> SubscriberToReqCtx_;
    // TODO(ivanzhukov): limit the size of the queue
    // FIXME(ivanzhukov): use a priority queue instead of std::set (make_heap?)
    std::multiset<TReqCtx*, TPostponedReqCompare> PostponedReqs_;
    /**
     * Number of requests actually sent to Metabase (rather than scheduled)
     */
    ui64 TotalInflight_{0};
};

template <typename TResp>
std::unique_ptr<IEventBase> CreateResponseEvent(std::shared_ptr<const ::google::protobuf::Message> msg) {
    auto resp = std::make_unique<TResp>();
    resp->Message = static_pointer_cast<const typename TResp::TProtoMsg>(msg);

    return resp;
}

std::unique_ptr<TMetabaseEvents::TError> CreateAnErrorEvent(
        grpc::StatusCode rpcCode,
        EMetabaseStatusCode metabaseCode,
        const TString& msg)
{
    auto errEv = std::make_unique<TMetabaseEvents::TError>();

    errEv->RpcCode = rpcCode;
    errEv->MetabaseCode = metabaseCode;
    errEv->Message = msg;

    return errEv;
}

template <typename TReq>
constexpr ERequestTypes MessageTypeFor() {
    if constexpr (std::is_same_v<TMetabaseEvents::TFindReq, TReq>) {
        return ERequestTypes::Find;
    } else if constexpr (std::is_same_v<TMetabaseEvents::TResolveOneReq, TReq>) {
        return ERequestTypes::ResolveOne;
    } else if constexpr (std::is_same_v<TMetabaseEvents::TResolveManyReq, TReq>) {
        return ERequestTypes::ResolveMany;
    } else if constexpr (std::is_same_v<TMetabaseEvents::TMetricNamesReq, TReq>) {
        return ERequestTypes::MetricNames;
    } else if constexpr (std::is_same_v<TMetabaseEvents::TLabelValuesReq, TReq>) {
        return ERequestTypes::LabelValues;
    } else if constexpr (std::is_same_v<TMetabaseEvents::TLabelNamesReq, TReq>) {
        return ERequestTypes::LabelNames;
    } else if constexpr (std::is_same_v<TMetabaseEvents::TUniqueLabelsReq, TReq>) {
        return ERequestTypes::UniqueLabels;
    } else {
        static_assert(TDependentFalse<TReq>, "unsupported req type");
    }
}

template <typename TFunc>
auto WithRespType(ui32 eventType, TFunc fn) {
    switch (static_cast<ERequestTypes>(eventType)) {
        case ERequestTypes::Find:
            return fn.template operator()<TMetabaseEvents::TFindResp>();
        case ERequestTypes::ResolveOne:
            return fn.template operator()<TMetabaseEvents::TResolveOneResp>();
        case ERequestTypes::ResolveMany:
            return fn.template operator()<TMetabaseEvents::TResolveManyResp>();
        case ERequestTypes::MetricNames:
            return fn.template operator()<TMetabaseEvents::TMetricNamesResp>();
        case ERequestTypes::LabelValues:
            return fn.template operator()<TMetabaseEvents::TLabelValuesResp>();
        case ERequestTypes::LabelNames:
            return fn.template operator()<TMetabaseEvents::TLabelNamesResp>();
        case ERequestTypes::UniqueLabels:
            return fn.template operator()<TMetabaseEvents::TUniqueLabelsResp>();
    }
}

template <typename TRequest, typename TResponse>
class TRpcMethod {
public:
    using TReq = TRequest;
    using TResp = TResponse;
    using TMetabaseRespOrError = TErrorOr<std::unique_ptr<const TResp>, TMetabaseError>;
    using TMethod = TMetabaseAsyncResponse<TResponse> (IMetabaseRpc::*)(const TRequest&);

    constexpr TRpcMethod(TMethod impl) noexcept
        : Impl_(impl)
    {
    }

    TMetabaseAsyncResponse<TResponse> Call(IMetabaseRpc* rpc, const TRequest& req) {
        return (rpc->*Impl_)(req);
    }

private:
    TMethod Impl_;
};

template <typename T>
struct TReqToRpcMethod;

template <>
struct TReqToRpcMethod<TMetabaseEvents::TFindReq> {
    static constexpr auto RpcMethod = TRpcMethod{&IMetabaseRpc::Find};
};

template <>
struct TReqToRpcMethod<TMetabaseEvents::TResolveOneReq> {
    static constexpr auto RpcMethod = TRpcMethod{&IMetabaseRpc::ResolveOne};
};

template <>
struct TReqToRpcMethod<TMetabaseEvents::TResolveManyReq> {
    static constexpr auto RpcMethod = TRpcMethod{&IMetabaseRpc::ResolveMany};
};

template <>
struct TReqToRpcMethod<TMetabaseEvents::TMetricNamesReq> {
    static constexpr auto RpcMethod = TRpcMethod{&IMetabaseRpc::MetricNames};
};

template <>
struct TReqToRpcMethod<TMetabaseEvents::TLabelNamesReq> {
    static constexpr auto RpcMethod = TRpcMethod{&IMetabaseRpc::LabelNames};
};

template <>
struct TReqToRpcMethod<TMetabaseEvents::TLabelValuesReq> {
    static constexpr auto RpcMethod = TRpcMethod{&IMetabaseRpc::LabelValues};
};

template <>
struct TReqToRpcMethod<TMetabaseEvents::TUniqueLabelsReq> {
    static constexpr auto RpcMethod = TRpcMethod{&IMetabaseRpc::UniqueLabels};
};

enum class EInflightDecreasePolicy {
    DoNotChange = 0,
    Decrease,
};

class TMetabaseShardActor: public TActorBootstrapped<TMetabaseShardActor>, TPrivateEvents {
    enum {
        RequestRetry = SpaceBegin,
        End,
    };

    static_assert(End < SpaceEnd, "too many event types");

    struct TRequestRetry: TEventLocal<TRequestRetry, RequestRetry> {
        TRequestsManager::TReqCtx* ReqCtx;
        ui64 Id;
    };

public:
    TMetabaseShardActor(
            TString project,
            IMetabaseClusterRpcPtr rpc,
            const TShardLocation& shard,
            size_t maxInflight,
            std::shared_ptr<TMetrics> metrics,
            TActorId scheduler,
            TActorId cache)
        : Project_{std::move(project)}
        , Rpc_{std::move(rpc)}
        , ShardLocation_{shard}
        , MaxInflight_{maxInflight}
        , SchedulerId_{scheduler}
        , CacheId_{cache}
        , Metrics_{std::move(metrics)}
        , RequestsManager_{Metrics_.get()}
    {
    }

    void Bootstrap() {
        NHttp::RegisterMetricSupplier(Metrics_);
        Become(&TThis::Normal);
    }

    STATEFN(Normal) {
        switch (ev->GetTypeRewrite()) {
            /**
             * requests received from TMetabaseCluster
             */
            hFunc(TMetabaseEvents::TFindReq, OnRequest<TMetabaseEvents::TFindReq>);
            hFunc(TMetabaseEvents::TResolveOneReq, OnRequest<TMetabaseEvents::TResolveOneReq>);
            hFunc(TMetabaseEvents::TResolveManyReq, OnRequest<TMetabaseEvents::TResolveManyReq>);
            hFunc(TMetabaseEvents::TMetricNamesReq, OnRequest<TMetabaseEvents::TMetricNamesReq>);
            hFunc(TMetabaseEvents::TLabelNamesReq, OnRequest<TMetabaseEvents::TLabelNamesReq>);
            hFunc(TMetabaseEvents::TLabelValuesReq, OnRequest<TMetabaseEvents::TLabelValuesReq>);
            hFunc(TMetabaseEvents::TUniqueLabelsReq, OnRequest<TMetabaseEvents::TUniqueLabelsReq>);

            /**
             * responses received from Metabase
             */
            hFunc(TMetabaseEvents::TFindResp, OnResponse<TMetabaseEvents::TFindResp>);
            hFunc(TMetabaseEvents::TResolveOneResp, OnResponse<TMetabaseEvents::TResolveOneResp>);
            hFunc(TMetabaseEvents::TResolveManyResp, OnResponse<TMetabaseEvents::TResolveManyResp>);
            hFunc(TMetabaseEvents::TMetricNamesResp, OnResponse<TMetabaseEvents::TMetricNamesResp>);
            hFunc(TMetabaseEvents::TLabelNamesResp, OnResponse<TMetabaseEvents::TLabelNamesResp>);
            hFunc(TMetabaseEvents::TLabelValuesResp, OnResponse<TMetabaseEvents::TLabelValuesResp>);
            hFunc(TMetabaseEvents::TUniqueLabelsResp, OnResponse<TMetabaseEvents::TUniqueLabelsResp>);
            hFunc(TMetabaseEvents::TError, OnError);

            hFunc(TCacheEvents::TLookupResult, OnCacheResult);
            hFunc(TMetabaseShardActorEvents::TShardUpdate, OnShardUpdate);
            hFunc(TMetabaseShardActorEvents::TCancelRequest, OnCancelRequest);
            hFunc(TRequestRetry, OnRequestRetry);
            hFunc(TEvents::TEvPoison, OnPoison);
        }
    }

private:
    template <typename TReq>
    void OnRequest(typename TReq::TPtr& evPtr) {
        auto& ev = *evPtr->Get();
        auto reqId = Hash(*ev.Message);
        Y_VERIFY_DEBUG(!reqId.empty(), "unable to compute a request hash");

        MON_TRACE(MetabaseClient, SelfId() << " [" << HexEncode(reqId) << "] Request " << ev.Message->ShortDebugString()
            << " from " << evPtr->Sender << " cookie " << evPtr->Cookie);

        auto& reqCtx = *RequestsManager_.GetOrCreateReqCtx(reqId, TActivationContext::Now());

        if constexpr (std::is_same_v<typename TReq::TProtoMsg, FindRequest>) {
            reqCtx.FindMerger = TFindResultMerger{*evPtr->Get()->Message};
        }

        RequestsManager_.AddSubscriber(reqCtx,
                evPtr->Sender, evPtr->Cookie, evPtr->Get()->Deadline, std::move(evPtr->TraceId));

        if (reqCtx.Fresh()) {
            reqCtx.OriginalRequest = std::unique_ptr<IEventHandle>(evPtr.Release());
            reqCtx.MessageType = ToUnderlying(MessageTypeFor<TReq>());
        }

        if (reqCtx.CacheReqState == ECacheReqState::Absent) {
            auto lookupEv = std::make_unique<TCacheEvents::TLookup>();
            lookupEv->Project = Project_;
            lookupEv->MessageType = reqCtx.MessageType;
            lookupEv->MessageHash = reqId;

            Send(CacheId_, lookupEv.release());
            reqCtx.InFlight++;
            reqCtx.CacheReqState = ECacheReqState::Inflight;
        }
    }

    void OnCacheResult(const TCacheEvents::TLookupResult::TPtr& evPtr) {
        auto& ev = *(evPtr->Get());
        auto& reqId = ev.MessageHash;
        auto reqCtxPtr = RequestsManager_.GetReqCtx(reqId);
        auto& reqCtx = *reqCtxPtr;

        Y_VERIFY(reqCtx.InFlight > 0);
        reqCtx.InFlight--;
        reqCtx.CacheReqState = ECacheReqState::Absent;

        if (ev.Data) { // cache hit
            MON_TRACE(MetabaseClient, SelfId() << " [" << HexEncode(reqId) << "] CacheHit, NeedsRefresh=" << ev.NeedsRefresh);

            WithRespType(reqCtx.MessageType, [&]<typename TResp>() {
                SendResponseToSubscribers(reqCtx, ev.Data, CreateResponseEvent<TResp>);
            });
            if (!ev.NeedsRefresh) {
                if (reqCtx.InFlight == 0 && reqCtx.MetabaseReqState == EMetabaseReqState::New) {
                    // No metabase request running, we can safely drop ctx now
                    RequestsManager_.EraseReqCtx(reqCtx);
                } // Metabase request in progress, ctx must be preserved
                return;
            }
        } // else -- cache miss or need refresh

        if (reqCtx.MetabaseReqState != EMetabaseReqState::New) { // Already in flight or postponed
            return;
        }

        reqCtx.MetabaseReqState = EMetabaseReqState::Inflight;
        TryToSendRequestToMetabase(reqCtxPtr.get());
    }

    void OnRequestRetry(const TRequestRetry::TPtr& evPtr) {
        auto eventId = evPtr->Get()->Id;
        auto* reqCtxPtr = evPtr->Get()->ReqCtx;

        if (!RequestsManager_.IsKnownRetry(eventId)) {
            // the request was completed or canceled, nothing to process
            return;
        }

        reqCtxPtr->RetryId = 0;
        RequestsManager_.DeleteSubscribersWhoTimedOut(*reqCtxPtr, TActivationContext::Now());

        if (RequestsManager_.GetSubscribers(*reqCtxPtr).empty()) {
            RequestsManager_.EraseReqCtx(*reqCtxPtr);
            return;
        }

        TryToSendRequestToMetabase(reqCtxPtr);
    }

    void TryToSendRequestToMetabase(TRequestsManager::TReqCtx* reqCtxPtr) {
        auto& reqCtx = *reqCtxPtr;

        if (RequestsManager_.GetTotalInflightCount() >= MaxInflight_) { // max inflight is reached
            MON_TRACE(MetabaseClient, SelfId() << " [" << HexEncode(reqCtx.Id) << "] Postponed");
            reqCtx.MetabaseReqState = EMetabaseReqState::Postponed;

            RequestsManager_.AddPostponedRequest(reqCtxPtr);
            return;
        }

        SendRequestToMetabase(reqCtxPtr);
    }

    void SendRequestToMetabase(TRequestsManager::TReqCtx* reqCtx) {
        WithRespType(reqCtx->MessageType, [&]<typename TResp>() {
            using TReq = typename TMetabaseEvents::TResponseToRequest<TResp>::TRequest;
            auto* origEv = static_cast<typename TReq::THandle*>(reqCtx->OriginalRequest.get());

            SendRequestToMetabase<TReq>(reqCtx, origEv);
        });
    }

    template <typename TResp, typename TReq = typename TMetabaseEvents::TResponseToRequest<TResp>::TRequest>
    std::unique_ptr<IEventBase> RpcResponseToEvent(TMetabaseAsyncResponse<typename TResp::TProtoMsg> rpcResp) {
        typename TMetabaseAsyncResponse<typename TResp::TProtoMsg>::value_type respOrError;

        try {
            respOrError = rpcResp.ExtractValue();
        } catch (...) {
            return CreateAnErrorEvent(
                    grpc::StatusCode::UNKNOWN,
                    EMetabaseStatusCode::INTERNAL_ERROR,
                    CurrentExceptionMessage());
        }

        if (respOrError.Fail()) {
            auto err = respOrError.ExtractError();
            return CreateAnErrorEvent(err.RpcCode, err.MetabaseCode, err.Message);
        }

        if (respOrError.Value()->status() == EMetabaseStatusCode::OK) {
            auto respEv = std::make_unique<typename TMetabaseEvents::TRequestToResponse<TReq>::TResponse>();
            respEv->Message = respOrError.Extract();

            return respEv;
        }

        const auto& value = *(respOrError.Value());
        return CreateAnErrorEvent(grpc::StatusCode::OK, value.status(), value.statusmessage());
    }

    template <typename TReq, typename TResp = typename TMetabaseEvents::TRequestToResponse<TReq>::TResponse>
    void SendRequestToMetabase(TRequestsManager::TReqCtx* reqCtxPtr, typename TReq::THandle* evPtr) {
        auto& reqCtx = *reqCtxPtr;
        reqCtx.MetabaseReqState = EMetabaseReqState::Inflight;

        MON_TRACE(MetabaseClient, SelfId() << " [" << HexEncode(reqCtx.Id) << "] "
                << "Sending a request to the metabase(" << ShardLocation_.Address << "): "
                << evPtr->Get()->Message->ShortDebugString());

        auto rpcMethod = TReqToRpcMethod<TReq>::RpcMethod;
        auto node = Rpc_->Get(ShardLocation_.Address);
        auto future = rpcMethod.Call(node, *(evPtr->Get()->Message));

        RequestsManager_.IncInflight(reqCtx);

        auto* actorSystem = TActorContext::ActorSystem();
        auto selfId = SelfId();

        future.Subscribe([=](auto f) {
            auto reqCtxAsACookie = reinterpret_cast<ui64>(reqCtxPtr);

            actorSystem->Send(new IEventHandle(selfId, selfId, RpcResponseToEvent<TResp>(f).release(), 0, reqCtxAsACookie));
        });
    }

    template <typename TResp, typename TReq = typename TMetabaseEvents::TResponseToRequest<TResp>::TRequest>
    void SaveCache(const TRequestsManager::TReqCtx& reqCtx, std::shared_ptr<const google::protobuf::Message> msg) {
        auto storeEv = std::make_unique<TCacheEvents::TStore>();
        storeEv->Project = Project_;
        storeEv->MessageType = reqCtx.MessageType;
        storeEv->MessageHash = reqCtx.Id;
        storeEv->Data = std::move(msg);

        MON_TRACE(MetabaseClient, SelfId() << " [" << HexEncode(reqCtx.Id) << "] Store in cache");

        Send(CacheId_, storeEv.release());
    }

    template <typename TResponseFactory>
    void SendResponseToSubscribers(
            TRequestsManager::TReqCtx& reqCtx,
            const std::shared_ptr<const typename ::google::protobuf::Message>& respMsg,
            TResponseFactory createResponse)
    {
        for (const auto& subscriberInfo: RequestsManager_.GetSubscribers(reqCtx)) {
            MON_TRACE(MetabaseClient, SelfId() << " [" << HexEncode(reqCtx.Id) << "] Replying to " << subscriberInfo.Id);
            Send(subscriberInfo.Id, createResponse(respMsg).release(), 0, subscriberInfo.Cookie, TSpanId(subscriberInfo.Span));
        }

        RequestsManager_.DeleteSubscribers(reqCtx);
    }

    template<typename TErrorFactory>
    void SendErrorToSubscribers(
        TRequestsManager::TReqCtx& reqCtx,
        const TMetabaseEvents::TError& error,
        TErrorFactory createError)
    {
        for (const auto& subscriberInfo: RequestsManager_.GetSubscribers(reqCtx)) {
            MON_TRACE(MetabaseClient,
                      SelfId() << " [" << HexEncode(reqCtx.Id) << "] Replying with an error to " << subscriberInfo.Id);
            auto errEv = createError(error.RpcCode, error.MetabaseCode, error.Message);
            Send(subscriberInfo.Id, errEv.release(), 0, subscriberInfo.Cookie, TSpanId(subscriberInfo.Span));
        }

        if (!IsRetryableError(error.MetabaseCode)) {
            RequestsManager_.DeleteSubscribers(reqCtx);
        }
    }

    void ProcessPostponedRequests() {
        Y_VERIFY(RequestsManager_.GetTotalInflightCount() < MaxInflight_);

        auto* reqCtxPtr = RequestsManager_.ExtractPostponedRequestToProcess();
        if (!reqCtxPtr) { // no reqs to process
            return;
        }

        MON_TRACE(MetabaseClient, SelfId() << " [" << reqCtxPtr->Id << "] Processing postponed request");
        SendRequestToMetabase(reqCtxPtr);
    }

    template <typename TResp>
    void HandleResponse(
            TRequestsManager::TReqCtx& reqCtx,
            const std::shared_ptr<const typename ::google::protobuf::Message>& respMsg)
    {
        MON_TRACE(MetabaseClient, SelfId() << " [" << HexEncode(reqCtx.Id) << "] Response");

        RequestsManager_.DeleteSubscribersWhoTimedOut(reqCtx, TActivationContext::Now());
        SendResponseToSubscribers(reqCtx, respMsg, CreateResponseEvent<TResp>);
        SaveCache<TResp>(reqCtx, std::move(respMsg));

        Y_VERIFY(reqCtx.InFlight > 0);
        RequestsManager_.DecInflight(reqCtx);

        if (reqCtx.InFlight == 0) {
            RequestsManager_.EraseReqCtx(reqCtx);
        }

        ProcessPostponedRequests();
    }

    void HandleError(TRequestsManager::TReqCtx& reqCtx, const TMetabaseEvents::TError& error) {
        MON_TRACE(MetabaseClient, SelfId() << " [" << HexEncode(reqCtx.Id) << "] Error");

        RequestsManager_.DecInflight(reqCtx);
        RequestsManager_.DeleteSubscribersWhoTimedOut(reqCtx, TActivationContext::Now());
        SendErrorToSubscribers(reqCtx, error, CreateAnErrorEvent);

        if (IsRetryableError(error.MetabaseCode)) {
            ScheduleARetry(reqCtx, error.MetabaseCode, error.Message);
        } else if (reqCtx.InFlight == 0) {
            RequestsManager_.EraseReqCtx(reqCtx);
        }

        ProcessPostponedRequests();
    }

    template <typename TResp>
    void OnResponse(typename TResp::TPtr& ev) {
        auto& reqCtx = *reinterpret_cast<TRequestsManager::TReqCtx*>(ev->Cookie);
        auto respMsg = std::move(ev->Get()->Message);
        HandleResponse<TResp>(reqCtx, respMsg);
    }

    template <>
    void OnResponse<TMetabaseEvents::TFindResp>(TMetabaseEvents::TFindResp::TPtr& ev) {
        auto& message = ev->Get()->Message;
        auto& reqCtx = *reinterpret_cast<TRequestsManager::TReqCtx*>(ev->Cookie);

        ui32 limit = reqCtx.OriginalRequest->Get<TMetabaseEvents::TFindReq>()->Message->GetSliceOptions().GetLimit();
        if (message->GetTotalCount() > limit) {
            TMetabaseEvents::TError error;
            error.RpcCode = grpc::StatusCode::OK;
            error.MetabaseCode = yandex::solomon::metabase::EMetabaseStatusCode::INVALID_REQUEST;
            error.Message = TStringBuilder{} << "total metric count exceeds the limit: " << message->GetTotalCount()
                                             << " > " << limit;
            HandleError(reqCtx, error);
            return;
        }

        auto& merger = *reqCtx.FindMerger;
        merger.AddResponse(*message);
        if (merger.IsReady()) {
            auto respMsg = std::make_shared<FindResponse>(std::move(merger).MergedResponse());
            HandleResponse<TMetabaseEvents::TFindResp>(reqCtx, respMsg);
            return;
        }

        auto rpcMethod = TReqToRpcMethod<TMetabaseEvents::TFindReq>::RpcMethod;
        auto node = Rpc_->Get(ShardLocation_.Address);
        auto future = rpcMethod.Call(node, merger.NextRequest());

        future.Subscribe([actorSystem = TActorContext::ActorSystem(), cookie = ev->Cookie, this](auto f) {
            actorSystem->Send(new IEventHandle(
                    SelfId(),
                    SelfId(),
                    RpcResponseToEvent<TMetabaseEvents::TFindResp>(f).release(),
                    0,
                    cookie));
        });
    }

    bool ScheduleARetry(TRequestsManager::TReqCtx& reqCtx, EMetabaseStatusCode statusCode, const TString& message) {
        MON_WARN(MetabaseClient, "got " << EMetabaseStatusCode_Name(statusCode)
                << " from Metabase (" << message << ")"
                << ", will retry after " << reqCtx.RetryState.NextDelay());

        reqCtx.RetryState.Retry();

        auto retryId = ++RetryCnt_;
        auto retryEv = std::make_unique<TRequestRetry>();
        retryEv->Id = retryId;
        retryEv->ReqCtx = &reqCtx;

        reqCtx.RetryId = retryId;
        RequestsManager_.AddRetryId(retryId);

        Send(SchedulerId_, new TSchedulerEvents::TScheduleAfter{retryId, reqCtx.RetryState.NextDelay(), std::move(retryEv)});
        return true;
    }

    void OnError(TMetabaseEvents::TError::TPtr& evPtr) {
        auto& reqCtx = *reinterpret_cast<TRequestsManager::TReqCtx*>(evPtr->Cookie);
        const auto& ev = *evPtr->Get();
        HandleError(reqCtx, ev);
    }

    void OnCancelRequest(const TMetabaseShardActorEvents::TCancelRequest::TPtr& evPtr) {
        RequestsManager_.DeleteSubscriber(evPtr->Sender);
    }

    void OnShardUpdate(const TMetabaseShardActorEvents::TShardUpdate::TPtr& ev) {
        auto shardInfo = ev->Get()->ShardInfo;

        Y_VERIFY(ShardLocation_.Id == shardInfo->Id);

        ShardLocation_ = TShardLocation{shardInfo->Id, shardInfo->Address};
    }

    void OnPoison(TEvents::TEvPoison::TPtr& ev) {
        Send(ev->Sender, new TEvents::TEvPoisonTaken);
        PassAway();
    }

private:
    TString Project_;
    IMetabaseClusterRpcPtr Rpc_;
    TShardLocation ShardLocation_;
    size_t MaxInflight_;
    TActorId SchedulerId_;
    TActorId CacheId_;
    std::shared_ptr<TMetrics> Metrics_;
    TRequestsManager RequestsManager_;
    ui64 RetryCnt_{0};
};

class TShardActorFactory final: public IShardActorFactory {
public:
    TShardActorFactory(TString replica, TActorId schedulerId)
        : Replica_{std::move(replica)}
        , SchedulerId_{schedulerId}
    {
    }

private:
    std::unique_ptr<NActors::IActor> Create(
            TString project,
            IMetabaseClusterRpcPtr rpc,
            const TShardLocation& shard,
            size_t maxInflight,
            NActors::TActorId cache) override
    {
        return std::make_unique<TMetabaseShardActor>(
                std::move(project),
                std::move(rpc),
                shard,
                maxInflight,
                std::make_shared<TMetrics>(Replica_, project),
                SchedulerId_,
                cache);
    }

private:
    TString Replica_;
    std::shared_ptr<NMonitoring::IMetricRegistry> Metrics_;
    TActorId SchedulerId_;
};

} // namespace

std::shared_ptr<IShardActorFactory> CreateShardActorFactory(TString replica, NActors::TActorId schedulerId) {
    return std::make_shared<TShardActorFactory>(std::move(replica), schedulerId);
}

} // namespace NSolomon::NDataProxy
