#include "module.h"

#include "backends_factory.h"
#include "entities.h"
#include "rate_limiter.h"

#include <balancer/modules/service_discovery/sd.h>
#include <balancer/modules/report/lib/uuid_parser.h>

#include <balancer/kernel/coro/coro_async.h>
#include <balancer/kernel/custom_io/double.h>
#include <balancer/kernel/custom_io/rewind.h>
#include <balancer/kernel/helpers/errors.h>
#include <balancer/kernel/http/parser/http.h>
#include <balancer/kernel/http/parser/status_codes/status_codes.h>
#include <balancer/kernel/log/errorlog.h>
#include <balancer/kernel/module/module.h>
#include <balancer/kernel/process/thread_info.h>
#include <balancer/kernel/process/config_check.h>

#include <library/cpp/json/json_writer.h>

#include <util/digest/murmur.h>
#include <util/generic/scope.h>
#include <util/string/cast.h>
#include <util/string/strip.h>
#include <util/thread/singleton.h>
#include <util/random/shuffle.h>

using namespace NSrvKernel;
using namespace NModBalancer;

constexpr const ui16 MIN_HTTP_CODE = 100;
constexpr const ui16 MAX_HTTP_CODE = 600;

namespace NModBalancer {

    TAttempts::TAttempts(size_t value)
        : OriginalValue_(value)
        , Value_(value)
    {
    }

    void TAttempts::Override(TStringBuf v) {
        const TStringBuf stripped(StripString(v));
        size_t val;
        if (TryFromString<size_t>(stripped, val) && val > 0) {
            IsOverriden_ = true;
            Value_ = val;
        } else {
            IsOverriden_ = false;
            Value_ = OriginalValue_;
        }
    }

    size_t TAttempts::Get(size_t backendsCount) const {
        if (UseBackendsCount_ && !IsOverriden_) {
            return backendsCount;
        } else {
            return Value_;
        }
    }

    void TAttempts::ParseConfig(TStringBuf v) {
        if (v == "count_backends") {
            UseBackendsCount_ = true;
        } else {
            UseBackendsCount_ = false;
            OriginalValue_ = FromString<size_t>(v);
            Value_ = OriginalValue_;
        }
    }

    namespace {
        struct TCheckBackends {
            TBackendCheckParameters Parameters;
            bool Skip = false;
        };
    }

    inline TError RewindClientInput(TLimitedRewindableInput& clientInput) noexcept {
        Y_REQUIRE(clientInput.IsRewindable(),
            THttpError{503} << "too big request to retry");
        clientInput.Rewind();
        return {};
    }

    class THashUpdater {
    public:
        THashUpdater(const TConnDescr& descr, bool isHashing) noexcept
            : Descr_(descr.Copy())
            , IsHashing_(isHashing)
        {}

        const TConnDescr& Descr() const noexcept {
            return Descr_;
        }

        void Update() noexcept {
            if (IsHashing_) {
                ++Attempt_;
                Descr_.Hash += Attempt_;
                Descr_.Hash = MurmurHash<TRequestHash>(&Descr_.Hash, sizeof(Descr_.Hash));
            }
        }

        void UpdateId(ui64 id) noexcept {
            Descr_.Id = id;
        }

    private:
        TConnDescr Descr_;
        TRequestHash Attempt_ = 0;
        bool IsHashing_ = false;
    };

    class TDeferredLogOutput: public TStringStream {
    public:
        TDeferredLogOutput(IOutputStream* slave)
            : Slave_(slave)
        {}

        void Flush() {
            if (Slave_) {
                *Slave_ << Str();
            }
            Clear();
        }

    private:
        IOutputStream* Slave_;
    };
}

Y_TLS(balancer) {
    explicit TTls(const TAttempts& attempts)
        : Attempts(attempts)
    {}

    void UpdateAttempts() noexcept {
        const auto& data = AttemptsFileReader.Data();
        if (data.Id() != AttemptsData.Id()) {
            AttemptsData = data;
            Attempts.Override(AttemptsData.Data());
        }
    }

    TSharedFileReReader AttemptsFileReader;
    TSharedFileReReader::TData AttemptsData;
    TAttempts Attempts{1};
    size_t Incomplete = 0;

    THolder<NBalancerSD::TWorkerBackendsRef> WorkerBackendsRef;
};

MODULE_WITH_TLS(balancer) {
public:
    TModule(const TModuleParams& mp)
        : TModuleBase(mp)
        , Entities_("balancer2: ")
        , AttemptsRateLimiter_(MakeHolder<TAttemptsRateLimiter>())
    {
        Config->ForEach(this);

        if (BaseAttempts_.GetOriginalValue() <= 0) {
            ythrow TConfigParseError() << "zero attempts in balancer2 are not allowed";
        }

        if (FastAttemptsUsed_ && ConnectionAttemptsUsed_) {
            ythrow TConfigParseError() << "try to use both connection_attempts and fast_attempts";
        }

        if (ConnectionAttemptsUsed_ && Fast503_) {
            ythrow TConfigParseError() << "try to use fast_503 with connection_attempts";
        }

        if(ReturnLast5xx_ && ReturnLastBlacklisted_) {
            ythrow TConfigParseError() << "try to use both return_last_5xx and return_last_blacklisted_http_code";
        }

        if (ReturnLast5xx_ && !StatusCodeReactions_.IsClassHasBadMembers(5)) {
            ythrow TConfigParseError() << "try to return 5xx, but 5xx hasn't members in blacklist";
        }

        if (CheckBackends_) {
            Entities_.ApplyConfiguration(*this, CheckBackends_->Parameters);
        } else {
            Entities_.ApplyConfiguration(*this);
        }

        if (RegisterEvents_) {
            RegisterEvent("dump_backends", "balancer2.DumpBackends", &TModule::DumpBackends, this);
            RegisterEvent("dump_balancing_state", "balancer2.DumpBalancingState", &TModule::DumpBalancingState, this);
            RegisterEvent("dump_weights_file_tags", "balancer2.DumpWeightsFileTags", &TModule::DumpWeightsFileTags, this);
        }

        AttemptsRateLimiter_->Verify(HedgedDelay_);

        TopLevelBalancing_ = !CheckParents([&](TStringBuf name) { return "balancer" == name; });
    }

private:
    START_PARSE {
        PARSE_EVENTS;

        if (key == "attempts") {
            BaseAttempts_.ParseConfig(value->AsString());
            return;
        }


        if (key == "connection_attempts") {
            FastAttempts_.ParseConfig(value->AsString());
            ConnectionAttemptsUsed_ = true;
            return;
        }

        if (key == "fast_attempts") {
            FastAttempts_.ParseConfig(value->AsString());
            FastAttemptsUsed_ = true;
            return;
        }

        ON_KEY("fast_503", Fast503_) {
            return;
        }

        if (key == "status_code_blacklist") {
            TStatusCodeReactionParser parser(StatusCodeReactions_, TStatusCodeReaction::Bad());
            value->AsSubConfig()->ForEach(&parser);
            return;
        }

        if (key == "status_code_blacklist_exceptions") {
            TStatusCodeReactionParser parser(StatusCodeReactions_, TStatusCodeReaction::Good());
            value->AsSubConfig()->ForEach(&parser);
            return;
        }

        ON_KEY("return_last_5xx", ReturnLast5xx_) {
            return;
        }

        ON_KEY("return_last_blacklisted_http_code", ReturnLastBlacklisted_) {
            return;
        }

        ON_KEY("hedged_delay", HedgedDelay_) {
            return;
        }

        ON_KEY("hedged_cancel", HedgedCancel_) {
            return;
        }

        ON_KEY("first_delay", InitialDelay_) {
            return;
        }

        ON_KEY("delay_multiplier", DelayMultiplier_) {
            return;
        }

        ON_KEY("max_random_delay", MaxRandomDelay_) {
            return;
        }

        ON_KEY("delay_on_fast", DelayOnFastAttempts_) {
            return;
        }

        ON_KEY("attempts_file", AttemptsFileName_) {
            return;
        }

        if (key == "on_error") {
            TSubLoader(Copy(value->AsSubConfig())).Swap(OnError_);
            return;
        }

        if (key == "on_fast_error") {
            TSubLoader(Copy(value->AsSubConfig())).Swap(OnFastError_);
            return;
        }

        if (key == "on_status_code") {
            NSrvKernel::ParseMap(value->AsSubConfig(), [this](const auto &key, auto *val) {
                ui16 status = FromString<ui16>(key);
                if (status < MIN_HTTP_CODE || status >= MAX_HTTP_CODE) {
                    ythrow TConfigParseError() << "on_status_code should have status codes in range 100-599";
                }
                TSubLoader(Copy(val->AsSubConfig())).Swap(OnStatusCode_[status]);
            });
            return;
        }

        ON_KEY("rewind_limit", LimitRewindBytes_) {
            return;
        }

        bool retryNonIdempotent = true;
        ON_KEY("retry_non_idempotent", retryNonIdempotent) {
            if (!retryNonIdempotent) {
                NotRetryableMethods_ |= EMethod::POST;
                NotRetryableMethods_ |= EMethod::PATCH;
            }
            return;
        }

        ON_KEY("register_events", RegisterEvents_) {
            return;
        }

        if (key == "not_retryable_methods") try {
            for (const auto& method : StringSplitter(value->AsString()).Split(',')) {
                NotRetryableMethods_ |= FromString<EMethod>(to_upper(ToString(method.Token())));
            }
            return;
        } catch (...) {
            ythrow TConfigParseError() << "error parsing " << key.Quote() << ": " << CurrentExceptionMessage();
        }

        ON_KEY("allow_retry_header", AllowRetryHeader_) {
            return;
        }

        if (key == "attempts_rate_limiter") {
            value->AsSubConfig()->ForEach(AttemptsRateLimiter_.Get());
            return;
        }

        if (key == "check_backends") {
            Y_ENSURE_EX(!CheckBackends_, TConfigParseError() << "check_backends redefined");
            CheckBackends_.ConstructInPlace();
            ParseMapStrict(value->AsSubConfig(), [this](auto& key, auto* value) {
                ON_KEY("quorum", CheckBackends_->Parameters.Quorum) {
                    return true;
                }
                ON_KEY("amount_quorum", CheckBackends_->Parameters.AmountQuorum) {
                    return true;
                }
                ON_KEY("hysteresis", CheckBackends_->Parameters.Hysteresis) {
                    Y_ENSURE_EX(*CheckBackends_->Parameters.Hysteresis >= 0,
                                TConfigParseError{} << "check_backends hysteresis can't be negative");
                    return true;
                }
                ON_KEY("amount_hysteresis", CheckBackends_->Parameters.AmountHysteresis) {
                    return true;
                }
                ON_KEY("name", CheckBackends_->Parameters.SectionName) {
                    return true;
                }
                ON_KEY("skip", CheckBackends_->Skip) {
                    return true;
                }
                return false;
            });

            if (!CheckBackends_->Skip) {
                Y_ENSURE_EX(CheckBackends_->Parameters.Quorum || CheckBackends_->Parameters.AmountQuorum,
                            TConfigParseError() << "neither quorum, nor amount_quorum is set in check_backends, but it is required to set at least one of them");
                Y_ENSURE_EX(CheckBackends_->Parameters.SectionName, TConfigParseError() << "check_backends name is required");
            }
            return;
        }

        ON_KEY("_apphost_hedged_after_last_chunk", HedgedAfterLastChunk_) {
            return;
        }

        ON_KEY("use_on_error_for_non_idempotent", UseOnErrorForNonIdempotent_) {
            return;
        }

        {
            const TModuleParams moduleParams = Copy(value->AsSubConfig());
            Entities_.Configure(key, moduleParams);
            return;
        }
    } END_PARSE

private:
    template <class Consumer>
    void ProcessCurrentBackends(Consumer&& f) {
        auto* process = FastTlsSingleton<NSrvKernel::NProcessCore::TThreadInfo>()->WorkerProcess;
        auto& tls = GetTls(process);

        if (tls.WorkerBackendsRef) {
            NBalancerSD::TBackendsRef holder(process->Executor().Running(), tls.WorkerBackendsRef->GetWorkerBackendsHolder());
            f(*holder.Backends());
        } else if (Entities_.Backends()) {
            f(*Entities_.Backends());
        }
    }

    void DumpBackends(TEventData& event) noexcept {
        auto* process = FastTlsSingleton<NSrvKernel::NProcessCore::TThreadInfo>()->WorkerProcess;
        ProcessCurrentBackends([&](IBackends& backends) {
            backends.DumpBackends(event.Out(), *process);
        });
    }

    void DumpBalancingState(TEventData& event) noexcept {
        auto* process = FastTlsSingleton<NSrvKernel::NProcessCore::TThreadInfo>()->WorkerProcess;
        ProcessCurrentBackends([&](IBackends& backends) {
            backends.DumpBalancingState(event.Out(), *process);
        });
    }

    void DumpWeightsFileTags(TEventData& event) noexcept {
        auto* process = FastTlsSingleton<NSrvKernel::NProcessCore::TThreadInfo>()->WorkerProcess;
        ProcessCurrentBackends([&](IBackends& backends) {
            backends.DumpWeightsFileTags(event.Out(), *process);
        });
    }

private:
    THolder<TTls> DoInitTls(IWorkerCtl* process) override {
        auto tls = MakeHolder<TTls>(BaseAttempts_);
        if (!!AttemptsFileName_) {
            tls->AttemptsFileReader = process->SharedFiles()->FileReReader(AttemptsFileName_, TDuration::Seconds(1));
        }

        Entities_.Init(process, tls->WorkerBackendsRef);
        AttemptsRateLimiter_->Init();

        if (CheckBackends_ && !CheckBackends_->Skip) {
            process->RegisterBackendGroup(CheckBackends_->Parameters.SectionName, this);
        }

        return tls;
    }

    void DoDispose(IWorkerCtl* process, TTls& tls) override {
        Entities_.Dispose(process, tls.WorkerBackendsRef);
    }

    class TAttemptsState : TNonCopyable {
    private:
        IAttemptsHolder* AttemptsHolderBase_ = nullptr;
        size_t AttemptsPassedToBackend_ = 0;
        size_t AttemptsInProgress_ = 0;
        size_t AttemptsChecked_ = 0;
        bool AttemptsLimited_ = false;
        bool FastError_ = true;
        bool MyBackendError_ = true;
        bool SentSomething_ = false;
        bool UpgradeCompleted_ = false;
        bool HedgedSucceeded_ = false;

        THashUpdater HashUpdater_;
        const TConnDescr& Descr_;
        THolder<IPolicy> Policy_;
        THolder<IAlgorithm> Algorithm_;
        TContSimpleEvent HedgedEvent_;
        IBackend* CheckedNextBackend_ = nullptr;

    public:
        TAttemptsState(IAttemptsHolder& attemptsHolderBase, const TConnDescr& descr,
                       THolder<IPolicy> policy, THolder<IAlgorithm> algorithm, bool isHashing)
            : AttemptsHolderBase_(&attemptsHolderBase)
            , HashUpdater_(descr, isHashing)
            , Descr_(descr)
            , Policy_(std::move(policy))
            , Algorithm_(std::move(algorithm))
            , HedgedEvent_(&descr.Process().Executor())
        {}

        IBackend* NextBackend() {
            if (CheckedNextBackend_) {
                return std::exchange(CheckedNextBackend_, nullptr);
            } else {
                return Policy_->Next(Algorithm_.Get(), FastError_);
            }
        }

        bool CheckNextBackend(bool fastError) {
            if (CheckedNextBackend_) {
                return true;
            } else {
                CheckedNextBackend_ = Policy_->Next(Algorithm_.Get(), fastError);
                return CheckedNextBackend_ != nullptr;
            }
        }

        [[nodiscard]]
        const THashUpdater& HashUpdater() const {
            return HashUpdater_;
        }

        [[nodiscard]]
        const TConnDescr& Descr() {
            return Descr_;
        }

        [[nodiscard]]
        const TConnDescr& NextDescr() {
            auto id = AttemptsHolderBase_->RegisterAttempt();
            HashUpdater_.Update();
            HashUpdater_.UpdateId(id);
            return HashUpdater_.Descr();
        }

        void UnregisterAttempt() const {
            AttemptsHolderBase_->UnregisterAttempt();
        }

        void RegisterSuccess() {
            --AttemptsInProgress_;
            ++AttemptsPassedToBackend_;
            Policy_->RegisterSuccess();
        }

        bool RegisterFail(const TError& error, bool fast503, bool hedgedRequest, ui64 id) {
            const bool fastError = AttemptsHolderBase_->RegisterFail(error, fast503, hedgedRequest, id);
            FastError_ &= fastError;
            if (!fastError) {
                ++AttemptsPassedToBackend_;
                Policy_->RegisterFail();
            }

            return fastError;
        }

        void OnRetryHeader() {
            Policy_->RegisterFail();
            Policy_->MarkAsRetry();
            FastError_ = false;
            AttemptsPassedToBackend_ = 1;
        }

        [[nodiscard]]
        size_t AttemptNumber() const {
            return AttemptsHolderBase_->AttemptsMax() - AttemptsHolderBase_->AttemptsAvailable();
        }

        [[nodiscard]]
        size_t AttemptsMax() const {
            return AttemptsHolderBase_->AttemptsMax();
        }

        [[nodiscard]]
        size_t AttemptsAvailable() const {
            return AttemptsHolderBase_->AttemptsAvailable();
        }

        [[nodiscard]]
        size_t FastAttemptsAvailable() const {
            return AttemptsHolderBase_->FastAttemptsAvailable();
        }

        [[nodiscard]]
        size_t AttemptsPassedToBackend() const {
            return AttemptsPassedToBackend_;
        }

        [[nodiscard]]
        size_t AttemptsInProgress() const {
            return AttemptsInProgress_;
        }

        void IncAttemptsInProgress() {
            ++AttemptsInProgress_;
        }

        void DecAttemptsInProgress() {
            --AttemptsInProgress_;
        }

        void ResetAttempts() {
            AttemptsHolderBase_->ResetAttempts();
        }

        [[nodiscard]]
        bool FastError() const {
            return FastError_;
        }

        [[nodiscard]]
        const bool& SentSomething() const {
            return SentSomething_;
        }

        void MarkSentSomething() {
            SentSomething_ = true;
            HedgedEvent_.BroadCast();
        }

        [[nodiscard]]
        bool UpgradeCompleted() const {
            return UpgradeCompleted_;
        }

        void MarkUpgradeCompleted() {
            UpgradeCompleted_ = true;
        }

        [[nodiscard]]
        bool MyBackendError() const {
            return MyBackendError_;
        }

        void MarkNotMyBackendError() {
            MyBackendError_ = false;
        }

        void OnAttempt(ui64 id, bool hedged) {
            if (hedged) {
                ++Descr().Properties->ConnStats.BackendHedgedAttempts;
            }
            AttemptsHolderBase_->NotifyAttempt(id, hedged);
        }

        void OnHedgedAttemptSuccess() {
            ++Descr().Properties->ConnStats.BackendHedgedSucc;
            AttemptsHolderBase_->NotifyHedgedRequestSuccess();
            HedgedSucceeded_ = true;
        }

        IAttemptsHolder& AttemptsHolder() {
            return *AttemptsHolderBase_;
        }

        bool HedgedSucceeded() const {
            return HedgedSucceeded_;
        }

        TContSimpleEvent& HedgedEvent() {
            return HedgedEvent_;
        }

        void IncAttemptsLimitChecked() noexcept {
            ++AttemptsChecked_;
        }

        bool AttemptsLimitChecked() noexcept {
            if (AttemptsChecked_ > 0) {
                --AttemptsChecked_;
                return true;
            }
            return false;
        }

        bool AttemptsLimited() const noexcept {
            return AttemptsLimited_;
        }

        void SetAttemptsLimited() noexcept {
            AttemptsLimited_ = true;
        }
    };

    class TInputWithCustomBody {
    public:
        TInputWithCustomBody(IAttemptsHolder& attemptsHolder, bool isHedgedRequest)
            : IsHedgedRequest_(isHedgedRequest)
            , AttemptsHolder_(attemptsHolder)
        {}

        TConnDescr PrepareConnDescr(const TConnDescr& descr, TLimitedRewindableInput& input, IHttpOutput& clientOutput) {
            auto id = descr.Id;
            if (!IsHedgedRequest_ && id == 0) {
                OnFirst(input);
            } else if (IsHedgedRequest_ && State_ != ERequestType::Hedged) {
                OnHedged(input);
            } else if (!IsHedgedRequest_ && id > 0 && State_ != ERequestType::Retry) {
                OnRetry(input);
            }

            auto newDescr = descr.Copy(input, clientOutput);
            newDescr.RequestType = State_;
            if (RequestWithBody_.Request.Get()) {
                newDescr.Request = RequestWithBody_.Request.Get();
            }

            return newDescr;
        }

    private:
        void OnFirst(TLimitedRewindableInput& input) {
            State_ = ERequestType::First;
            Initialize(input);
        }

        void OnHedged(TLimitedRewindableInput& input) {
            State_ = ERequestType::Hedged;
            Initialize(input);
        }

        void OnRetry(TLimitedRewindableInput& input) {
            State_ = ERequestType::Retry;
            Initialize(input);
        }

        TRequestWithBody GetRequestWithBody(ERequestType type) {
            switch (type) {
                case ERequestType::First:
                    return AttemptsHolder_.GetFirstRequest();
                case ERequestType::Retry:
                    return AttemptsHolder_.GetRetryRequest();
                case ERequestType::Hedged:
                    return AttemptsHolder_.GetHedgedRequest();
            }
        }

        void Initialize(TLimitedRewindableInput& input) {
            RequestWithBody_ = GetRequestWithBody(State_);
            if (!RequestWithBody_.Request.Get()) {
                return;
            }
            input.SetPrefix(TChunkList(std::move(RequestWithBody_.Body)));
        }

    private:
        const bool IsHedgedRequest_ = false;
        ERequestType State_ = ERequestType::First;
        IAttemptsHolder& AttemptsHolder_;
        TRequestWithBody RequestWithBody_;
    };

    TError MakeAttempts(TAttemptsState& state, TAttemptsRateLimiter& rateLimiter, TLimitedRewindableInput& input, IHttpOutput* clientOutput, TTls& tls, bool hedgedRequest = false, TDeferredLogOutput* deferredLog = nullptr) const {
        TDuration currentDelay = InitialDelay_;

        if (MaxRandomDelay_ != TDuration::Zero()) {
            state.Descr().Process().Executor().Running()->SleepT(MaxRandomDelay_ * RandomNumber<double>());
        }

        bool hedgedAttemptAllowed = false;
        Y_DEFER {
            if (hedgedRequest && !hedgedAttemptAllowed) {
                state.AttemptsHolder().NotifyHedgedRequestUnallowed();
            }
        };

        TInputWithCustomBody inputWithCustomBody(state.AttemptsHolder(), hedgedRequest);

        while (state.AttemptsAvailable() && !state.Descr().Process().Executor().Running()->Cancelled()) {
            if (hedgedRequest && !state.AttemptsHolder().IsHedgedAttemptAllowed()) {
                break;
            }

            auto nextDescr = inputWithCustomBody.PrepareConnDescr(state.NextDescr(), input, *clientOutput);
            if (hedgedRequest && deferredLog) {
                nextDescr.ExtraAccessLog = TAccessLogOutput(deferredLog, nextDescr.ExtraAccessLog.Summary());
            }

            LOG_ERROR(TLOG_INFO, state.Descr(), "balancer2 \"" << Entities_.BackendsType() << "\" start attempt " << state.AttemptNumber());

            if (TError error = RewindClientInput(input)) {
                nextDescr.ExtraAccessLog.SetSummary(GetHandle()->Name(), "rewind client input error");
                return error;
            }

            if (!state.AttemptsLimitChecked()) {
                // first request passes without restrictions
                if (size_t passedAndInProgress = state.AttemptsPassedToBackend() + state.AttemptsInProgress()) {
                    bool allowed = rateLimiter.RetryAllowed(passedAndInProgress - 1) && state.AttemptsHolder().RetryAllowed();
                    state.AttemptsHolder().OnReask(hedgedRequest, allowed);
                    if (!allowed) {
                        return Y_MAKE_ERROR(TAttemptLimitedError{state.AttemptNumber()});
                    }
                }
            }

            IBackend* const backend = state.NextBackend();
            if (!backend) {
                state.UnregisterAttempt();
                return Y_MAKE_ERROR(TNoValidBackendsError{});
            }

            state.IncAttemptsInProgress();

            const TInstant start = TInstant::Now();

            bool willReturn = false;
            Y_TRY(TError, error) {
                if (hedgedRequest && !hedgedAttemptAllowed) {
                    hedgedAttemptAllowed = true;
                }

                state.OnAttempt(nextDescr.Id, hedgedRequest);

                Y_PROPAGATE_ERROR(backend->Module()->Run(nextDescr));

                if (nextDescr.AttemptsHolder && nextDescr.AttemptsHolder->ShouldValidateBody()) {
                    state.MarkNotMyBackendError();
                }

                if (hedgedRequest) {
                    state.OnHedgedAttemptSuccess();
                }

                state.RegisterSuccess();

                backend->OnCompleteRequest(TInstant::Now() - start);

                LOG_ERROR(TLOG_INFO, nextDescr, "balancer2 \"" << Entities_.BackendsType() << "\" succeeded attempt " << state.AttemptNumber());

                willReturn = true;

                return TError{};
            } Y_CATCH {
                bool needDelay = false;

                Y_DEFER {
                    state.DecAttemptsInProgress();
                };

                if (!hedgedRequest && state.HedgedSucceeded()) {
                    state.RegisterSuccess();
                    return {};
                }

                if (auto* e = error.GetAs<TBackendError>()) {
                    if (!state.MyBackendError()) {
                        return error;
                    }
                    LOG_ERROR(TLOG_ERR, nextDescr, "balancer2 \"" << Entities_.BackendsType() << "\" failed attempt " << state.AttemptNumber());

                    if (const auto* ee = e->InnerError().GetAs<TSystemError>()) {
                        if (ee->Status() == ECANCELED) {
                            return error;
                        }
                    } else if (const auto* ee = e->InnerError().GetAs<TOnStatusCodeError>()) {
                        TError ret;
                        e->StoreTo(ret);
                        return ret;
                    }

                    const bool currentErrorIsFast = state.RegisterFail(error, Fast503_, hedgedRequest, nextDescr.Id);

                    backend->OnFailRequest(error, TInstant::Now() - start);

                    TRequest* const request = nextDescr.Request;
                    if (!request) {
                        if (!currentErrorIsFast) {
                            state.ResetAttempts();

                            LOG_ERROR(TLOG_DEBUG, nextDescr, "backend error: Retry in tcp proxy mode is not permitted");

                            TError ret;
                            e->StoreTo(ret);
                            return ret;
                        }
                    } else if (NotRetryableMethods_.HasFlags(request->RequestLine().Method)
                               && request->Props().TransferedWholeRequest && !state.SentSomething()) {
                        state.ResetAttempts();

                        LOG_ERROR(TLOG_ERR, nextDescr, "backend error: Retry of not retryable requests is not permitted");

                        if (!UseOnErrorForNonIdempotent_) {
                            TError ret;
                            e->StoreTo(ret);
                            return ret;
                        }
                    }

                    if (state.SentSomething()) {
                        if (!state.UpgradeCompleted()) {
                            ++tls.Incomplete;

                            LOG_ERROR(TLOG_ERR, nextDescr, "backend error: Incomplete output");
                        } else {
                            state.ResetAttempts();

                            LOG_ERROR(TLOG_ERR, nextDescr, "backend error: Upgraded protocol stream closed");
                        }

                        TError ret;
                        e->StoreTo(ret);
                        return ret;
                    }

                    if (DelayOnFastAttempts_ || !currentErrorIsFast) {
                        needDelay = true;
                    }

                    if (currentErrorIsFast) {
                        rateLimiter.RegisterFastAttempt();
                    }
                } else if (const auto* e = error.GetAs<TNetworkResolutionError>()) {
                    LOG_ERROR(TLOG_ERR, nextDescr, "balancer2 \"" << Entities_.BackendsType() << "\" failed attempt "
                        << state.AttemptNumber() << ' ' << e->what());

                    const bool currentErrorIsFast = state.RegisterFail(error, Fast503_, hedgedRequest, nextDescr.Id);

                    backend->OnFailRequest(error, TInstant::Now() - start);

                    if (DelayOnFastAttempts_ || !currentErrorIsFast) {
                        needDelay = true;
                    }

                    if (currentErrorIsFast) {
                        rateLimiter.RegisterFastAttempt();
                    }
                } else {
                    LOG_ERROR(TLOG_ERR, nextDescr, "balancer2 error: " << GetErrorMessage(error));
                    return error;
                }

                if (currentDelay == TDuration::Zero()) {
                    needDelay = false;
                }

                if (needDelay && state.AttemptsAvailable()) {
                    state.Descr().Process().Executor().Running()->SleepT(currentDelay);
                    currentDelay *= DelayMultiplier_;
                }
            }

            if (willReturn) {
                return TError{};
            }
        }

        if (state.Descr().Process().Executor().Running()->Cancelled()) {
            return {};
        } else {
            return Y_MAKE_ERROR(TAttemptsOverError{});
        }
    }

    TError MakeHedgedAttempts(TAttemptsState& state, TAttemptsRateLimiter& rateLimiter, TDeque<TLimitedRewindableInput>& inputs, IHttpOutput& clientOutput, TTls& tls) const {
        Y_ASSERT(!inputs.empty());

        TMaybe<size_t> whoTouched;

        std::array<IHttpOutput*, 2> overridenOutputs{
            &clientOutput,
            &clientOutput
        };
        if (state.AttemptsHolder().ShouldValidateBody()) {
            overridenOutputs[0] = state.AttemptsHolder().HttpOutput([&] {
                state.MarkSentSomething();
                whoTouched = 0;
            }, &clientOutput, false);
            overridenOutputs[1] = state.AttemptsHolder().HttpOutput([&] {
                state.MarkSentSomething();
                whoTouched = 1;
            }, &clientOutput, true);
        }

        std::array outputs{
            TTouchTrackingHttpOutput{*overridenOutputs[0], state.SentSomething(), 0, whoTouched},
            TTouchTrackingHttpOutput{*overridenOutputs[1], state.SentSomething(), 1, whoTouched}
        };

        std::array<TError, 2> errors;
        std::array<TCoroutine, 2> tasks;

        tasks[0] = TCoroutine{
            "hedged first task",
            &state.Descr().Process().Executor(),
            [&] {
                errors[0] = MakeAttempts(state, rateLimiter, inputs[0], &outputs[0], tls);
                state.AttemptsHolder().CancelHedged();
                state.HedgedEvent().BroadCast();
            }
        };

        if (HedgedAfterLastChunk_) {
            if (!state.AttemptsHolder().WaitReadyForHedged(state.Descr().Process().Executor().Running())) {
                tasks[0].Join();
                return std::move(errors[0]);
            }
        }

        TInstant hedgedSoftDeadline = GetHedgedSoftDeadline(state);

        if (state.HedgedEvent().WaitD(hedgedSoftDeadline) == ETIMEDOUT && state.AttemptsAvailable()) {
            if (!state.AttemptsHolder().IsHedgedAttemptAllowed()) {
                tasks[0].Join();
                return std::move(errors[0]);
            }

            if (!state.AttemptsHolder().ShouldSendHedged()) {
                state.AttemptsHolder().NotifyHedgedRequestUnallowed();
                tasks[0].Join();
                return std::move(errors[0]);
            }

            TDeferredLogOutput deferredLog(state.Descr().ExtraAccessLog.Slave());

            tasks[1] = TCoroutine{
                "hedged second task",
                &state.Descr().Process().Executor(),
                [&] {
                    errors[1] = MakeAttempts(state, rateLimiter, inputs[1], &outputs[1], tls, /*hedgedRequest =*/ true, /*deferredLog =*/ &deferredLog);

                    state.HedgedEvent().BroadCast();
                }
            };

            state.HedgedEvent().WaitI();

            if (HedgedCancel_ && state.SentSomething()) {
                Y_ENSURE_EX(whoTouched.Get(), yexception{} << "unknown request succeeded");
                tasks[1 - *whoTouched].Cancel();
            }
            for (auto& task : tasks) {
                task.Join();
            }
            deferredLog.Flush();
            for (size_t index = 0; index < 2; ++index) {
                if (!errors[index]) {
                    return {};
                }
            }
            if (errors[0].GetAs<TAttemptsOverError>()) {
                return std::move(errors[1]);
            } else {
                return std::move(errors[0]);
            }
        } else {
            tasks[0].Join();
            return std::move(errors[0]);
        }

        return {};
    }

    TBackendCheckResult CheckBackends(IWorkerCtl& proc, bool runtimeCheck, TTls& tls) const noexcept override {
        if (tls.WorkerBackendsRef) {
            if (auto err = tls.WorkerBackendsRef->PopLastUpdateError()) {
                return {std::move(err), TBackendCheckResult::EStatus::Failed};
            } else if (tls.WorkerBackendsRef->LastUpdateFailed()) {
                return {Y_MAKE_ERROR(yexception{} << "last SD update failed"), TBackendCheckResult::EStatus::Failed};
            }
        }

        NBalancerSD::TBackendsRef holder(
            proc.Executor().Running(),
            tls.WorkerBackendsRef ? tls.WorkerBackendsRef->GetWorkerBackendsHolder() : nullptr
        );
        return (holder.Backends()?:Entities_.Backends())->CheckBackends(proc, runtimeCheck);
    }

    bool IsHedgedPossible(TAttemptsState& state) const {
        return state.AttemptsHolder().GetHedgedSoftDeadline() != TInstant::Max() || (HedgedDelay_ != TDuration::Zero() && !NotRetryableMethods_.HasFlags(state.Descr().Request->RequestLine().Method));
    }

    TInstant GetHedgedSoftDeadline(TAttemptsState& state) const {
        auto hedgedSoftDeadline = state.AttemptsHolder().GetHedgedSoftDeadline();
        if (hedgedSoftDeadline != TInstant::Max()) {
            return hedgedSoftDeadline;
        }
        return HedgedDelay_.ToDeadLine();
    }

    TError DoRun(const TConnDescr& descr, TTls& tls) const override {
        tls.UpdateAttempts();

        auto* rateLimiter = AttemptsRateLimiter_.Get();
        if (!TopLevelBalancing_ && descr.AttemptsHolder && descr.AttemptsHolder->GetRateLimit()) {
            rateLimiter = descr.AttemptsHolder->GetRateLimit();
        }
        rateLimiter->RegisterRequest();

        if (tls.WorkerBackendsRef) {
            if (auto err = tls.WorkerBackendsRef->PopLastUpdateError()) {
                LOG_ERROR(TLOG_ERR, descr, "unable to update backends: " << GetErrorMessage(err));
            }
        }

        NBalancerSD::TBackendsRef holder(descr.Process().Executor().Running(), tls.WorkerBackendsRef ? tls.WorkerBackendsRef->GetWorkerBackendsHolder() : nullptr);
        IBackends* backends = holder.IsStored(nullptr) ? Entities_.Backends() : nullptr;

        Y_VERIFY(!holder.IsStored(nullptr) || backends);

        THolder<TAttemptsHolderBase> attemptsHolderBase;
        if (TopLevelBalancing_ || !descr.AttemptsHolder) {
            size_t backendsCount = backends ? backends->Size() : holder.BackendsCount();
            attemptsHolderBase = MakeHolder<TAttemptsHolderBase>(tls.Attempts.Get(backendsCount), FastAttempts_.Get(backendsCount));
        }

        IAttemptsHolder& attemptsHolder = attemptsHolderBase ? *attemptsHolderBase : *descr.AttemptsHolder;
        attemptsHolder.ResetDcAttempts();
        TStepParams stepParams(&descr, attemptsHolder.AttemptsAvailable() + attemptsHolder.FastAttemptsAvailable());
        THolder<IPolicy> policy = Entities_.PolicyFactory()->ConstructPolicy(stepParams);
        THolder<IAlgorithm> algorithm = backends ? backends->ConstructAlgorithm(stepParams) : holder.ConstructAlgorithm(stepParams);
        bool isHashing = backends ? backends->IsHashing() : holder.IsHashing();
        TAttemptsState state(attemptsHolder, descr, std::move(policy), std::move(algorithm), isHashing);

        size_t rewindLimit = LimitRewindBytes_;
        if (!descr.Request) { // tcp proxy
            rewindLimit = 0;
        }
        if (!OnError_ // it is possible to reenable OnErrorEnabled_ flag, so checking for not having on_error
            && state.AttemptsAvailable() == 1
            && (state.FastAttemptsAvailable() == 0 || !Fast503_))
        {
            rewindLimit = 0;
        }

        bool performHedged = IsHedgedPossible(state);

        TDeque<TLimitedRewindableInput> inputs;
        THolder<TDoubleInputMaster> hedgedInputMaster;
        std::array<THolder<TDoubleInputSlave>, 2> hedgedInputSlaves;

        if (performHedged) {
            hedgedInputMaster = MakeHolder<TDoubleInputMaster>(*descr.Input, rewindLimit);

            hedgedInputSlaves[0] = MakeHolder<TDoubleInputSlave>(*hedgedInputMaster, 0);
            hedgedInputSlaves[1] = MakeHolder<TDoubleInputSlave>(*hedgedInputMaster, 1);

            inputs.emplace_back(*hedgedInputSlaves[0], rewindLimit);
            inputs.emplace_back(*hedgedInputSlaves[1], rewindLimit);
        } else {
            inputs.emplace_back(*descr.Input, rewindLimit);
        }

        auto clientOutput = MakeHttpOutput([&](TResponse&& response, const bool forceClose, TInstant deadline) -> TError {
            const auto status = response.ResponseLine().StatusCode;
            // Emulation of nginx error_page. If status code is found
            // then "throw" exception and "catch" it for running submodule
            Y_REQUIRE(OnStatusCode_.find(status) == OnStatusCode_.end(), TBackendError{Y_MAKE_ERROR(TOnStatusCodeError(status))});

            bool canUseAsReturnLast = (ReturnLast5xx_ && status >= 500) ||
                    (ReturnLastBlacklisted_ && StatusCodeReactions_.IsBad(status));
            bool hasNextBackend = true;
            if (canUseAsReturnLast && state.AttemptsAvailable()) {
                hasNextBackend = state.CheckNextBackend(Fast503_ && status == 503);
            }

            TRequest* request = descr.Request;
            // TODO: maybe add  && request->Props().TransferedWholeRequest to retry partially sended request
            const bool notRetryable = request ? NotRetryableMethods_.HasFlags(request->RequestLine().Method) : false;
            bool canMakeRetry = hasNextBackend && state.AttemptsAvailable() && !notRetryable;
            if (canUseAsReturnLast && canMakeRetry) {
                if (size_t passedAndInProgress = state.AttemptsPassedToBackend() + state.AttemptsInProgress()) {
                    if (!rateLimiter->RetryAllowed(passedAndInProgress - 1)) {
                        canMakeRetry = false;
                        state.SetAttemptsLimited();
                    } else {
                        state.IncAttemptsLimitChecked();
                    }
                }
            }

            Y_REQUIRE((canUseAsReturnLast && !canMakeRetry) || !StatusCodeReactions_.IsBad(status),
                TBackendError(Y_MAKE_ERROR(THttpError(status))));

            if (!descr.AttemptsHolder || !descr.AttemptsHolder->ShouldValidateBody()) {
                state.MarkNotMyBackendError();
                state.MarkSentSomething();
            }

            if (status == HTTP_SWITCHING_PROTOCOLS) {
                state.MarkUpgradeCompleted();
                if (hedgedInputMaster) {
                    hedgedInputMaster->ResetRewind();
                }
                for (auto& input : inputs) {
                    input.ResetRewind();
                }
            }
            return descr.Output->SendHead(std::move(response), forceClose, deadline);
        }, [&](TChunkList lst, TInstant deadline) {
            if (!descr.AttemptsHolder || !descr.AttemptsHolder->ShouldValidateBody()) {
                state.MarkSentSomething();
            }
            return descr.Output->Send(std::move(lst), deadline);
        }, [&] (THeaders&& trailers, TInstant deadline) {
            return descr.Output->SendTrailers(std::move(trailers), deadline);
        });

        if (descr.Request && descr.Request->Headers().GetFirstValue(TXYandexRetryFsm::Instance()) && AllowRetryHeader_) {
            state.OnRetryHeader();
        }

        Y_DEFER {
            if (size_t passedAttempts = state.AttemptsPassedToBackend()) {
                rateLimiter->RegisterAttempts(passedAttempts - 1);
            }
        };

        Y_TRY(TError, error) {
            if (!performHedged) {
                IHttpOutput* overridenClientOutput = &clientOutput;
                if (state.AttemptsHolder().ShouldValidateBody()) {
                    overridenClientOutput = state.AttemptsHolder().HttpOutput([&] {
                        state.MarkSentSomething();
                    }, &clientOutput, false);
                }

                return MakeAttempts(state, *rateLimiter, inputs[0], overridenClientOutput, tls);
            } else {
                return MakeHedgedAttempts(state, *rateLimiter, inputs, clientOutput, tls);
            }
        } Y_CATCH {
            if (auto* e = error.GetAs<TOnStatusCodeError>()) {
                return RunOnStatusCode(state.Descr(), inputs[0], e->Code());
            } else if (auto* e = error.GetAs<TAttemptLimitedError>()) {
                state.Descr().Properties->ConnStats.LimitedBackendAttempt++;

                LOG_ERROR(TLOG_ERR, descr, "balancer2 error: attempt "
                    << e->Attempt() << " limited");
                state.Descr().ExtraAccessLog << " limited";

                return RunOnError(state.HashUpdater().Descr(), inputs[0], "attempt limited", state.FastError());
            } else if (auto* e = error.GetAs<TNoValidBackendsError>()) {
                ++descr.Properties->ConnStats.NoBackendsError;

                LOG_ERROR(TLOG_ERR, descr, "balancer2 error: No valid backends found");
                state.Descr().ExtraAccessLog << " no_backends";

                return RunOnError(state.HashUpdater().Descr(), inputs[0], "no valid backends found", state.FastError());
            } else if (auto* e = error.GetAs<TAttemptsOverError>()) {
                LOG_ERROR(TLOG_ERR, descr, "balancer2 error: All attempts (" << state.AttemptsMax() << ") failed");

                return RunOnError(descr, inputs[0], "all attempts failed", state.FastError());
            } else {
                return error;
            }
        };

        if (descr.Process().Executor().Running()->Cancelled()) {
            LOG_ERROR(TLOG_ERR, descr, "balancer2 error: cancelled with " << state.AttemptsAvailable() << " left");
            descr.ExtraAccessLog.SetSummary(GetHandle()->Name(), "cancelled");
            return Y_MAKE_ERROR(TSystemError{ECANCELED});
        }

        if (state.AttemptsLimited()) {
            state.Descr().Properties->ConnStats.LimitedBackendAttempt++;
            state.Descr().ExtraAccessLog << " limited";
            TAccessLogSummary *summary = descr.ExtraAccessLog.Summary();
            if (summary) {
                summary->AnsweredModule = GetHandle()->Name() + " | " + summary->AnsweredModule;
                summary->AnswerReason = "attempt limited | " + summary->AnswerReason;
            }
        }

        return {};
    }

    bool DoCanWorkWithoutHTTP() const noexcept override {
        return true;
    }

    bool DoExtraAccessLog() const noexcept override {
        return true;
    }

    TError RunOnError(const TConnDescr& descr, TLimitedRewindableInput& clientInput, TStringBuf error, bool fastError) const {
        bool useOnFastError = fastError && OnFastError_;
        auto& errorModule = useOnFastError ? OnFastError_ : OnError_;
        if (!errorModule) {
            descr.ExtraAccessLog.SetSummary(GetHandle()->Name(), ToString(error));
            return Y_MAKE_ERROR(TBackendError(Y_MAKE_ERROR(THttpError{503} << error << " "), fastError));
        }

        TExtraAccessLogEntry onErrorLogEntry(descr, useOnFastError ? "on_fast_error" : "on_error");
        if (TError err = RewindClientInput(clientInput)) {
            descr.ExtraAccessLog.SetSummary(
                GetHandle()->Name(),
                ToString(error) + TString(useOnFastError ? ", on_fast_error" : ", on_error") + ": rewind client input error"
            );
            return err;
        }
        Y_DEFER {
            TAccessLogSummary *summary = descr.ExtraAccessLog.Summary();
            if (summary) {
                summary->AnsweredModule = GetHandle()->Name() + " | " + summary->AnsweredModule;
                summary->AnswerReason = ToString(error) + TString(useOnFastError ? ", on_fast_error | " : ", on_error | ") + summary->AnswerReason;
            }
        };
        return errorModule->Run(descr.CopyIn(clientInput));
    }

    TError RunOnStatusCode(const TConnDescr& descr, TLimitedRewindableInput& clientInput, ui16 code) const {
        TStringBuilder sb;
        sb << "on_status_code " << code;
        TExtraAccessLogEntry onErrorLogEntry(descr, sb);
        if (TError error = RewindClientInput(clientInput)) {
            descr.ExtraAccessLog.SetSummary(GetHandle()->Name(), sb + ": rewind client input error");
            return error;
        }

        auto module = OnStatusCode_.find(code);
        if (module == OnStatusCode_.end()) {
            descr.ExtraAccessLog.SetSummary(GetHandle()->Name(), sb + ": no submodule");
            return Y_MAKE_ERROR(TBackendError{Y_MAKE_ERROR(yexception() << "Incorrect submodule for on_status_code " << code)});
        }

        Y_DEFER {
            TAccessLogSummary *summary = descr.ExtraAccessLog.Summary();
            if (summary) {
                summary->AnsweredModule = GetHandle()->Name() + " | " + summary->AnsweredModule;
                summary->AnswerReason = sb + " | " + summary->AnswerReason;
            }
        };
        return module->second->Run(descr.CopyIn(clientInput));
    }

private:
    TEntities Entities_;

    THolder<IModule> OnError_;
    THolder<IModule> OnFastError_;
    THashMap<ui16, THolder<IModule>> OnStatusCode_;
    TString AttemptsFileName_;
    size_t LimitRewindBytes_{ 30L << 20 }; // 30Mb limit
    TAttempts FastAttempts_{0};
    TAttempts BaseAttempts_{1};
    TDuration MaxRandomDelay_ = TDuration::Zero();
    TDuration InitialDelay_ = TDuration::Zero();
    double DelayMultiplier_ = 1;
    bool DelayOnFastAttempts_ = false;
    bool Fast503_ = false;
    bool FastAttemptsUsed_ = false;
    bool ConnectionAttemptsUsed_ = false;
    bool AllowRetryHeader_ = false;
    bool ReturnLast5xx_ = false;
    bool ReturnLastBlacklisted_ = false;
    bool TopLevelBalancing_ = false;
    THolder<TAttemptsRateLimiter> AttemptsRateLimiter_;
    TStatusCodeReactions StatusCodeReactions_;
    EMethods NotRetryableMethods_;
    TDuration HedgedDelay_ = TDuration::Zero();
    TMaybe<TCheckBackends> CheckBackends_;
    bool RegisterEvents_ = true;
    bool HedgedAfterLastChunk_ = false;
    bool UseOnErrorForNonIdempotent_ = false;
    bool HedgedCancel_ = true;
};

IModuleHandle* NModBalancer::Handle() {
    return TModule::Handle();
}
