#include <infra/netmon/topology/settings.h>
#include <infra/netmon/probe_schedule_maintainer.h>
#include <infra/netmon/library/futures.h>
#include <infra/netmon/library/requester.h>
#include <infra/netmon/settings.h>
#include <infra/netmon/metrics.h>

#include <library/cpp/json/json_reader.h>
#include <library/cpp/http/misc/httpcodes.h>

#include <util/datetime/cputimer.h>
#include <util/generic/array_ref.h>
#include <util/generic/is_in.h>

namespace NNetmon {
    TAtomic InterestedHostsCount;

    namespace {
        const TDuration SPARE_INTERVAL = TDuration::Seconds(10);
        const ui64 MINIMUM_LINK_POLLER_PROBES_TO_MUTE = 0.5 * 10 * TSettings::Get()->GetProbeRescheduleInterval().Seconds();
        const ui64 MUTED_HOSTS_COUNT_TO_LOG = 100;

        class TStableHostSet {
        public:
            TStableHostSet()
                : Hosts(MakeAtomicShared<TTopologyStorage::THostSet>())
                , PrevSetSize(0)
            {
            }

            // If proposed set size changes by more than InterestedHostsChangeThreshold
            // between two successive calls, TryReplace fails.
            bool TryReplace(const TTopologyStorage::THostSetRef& newHosts) {
                bool canReplace = IsDeltaUnderThreshold(newHosts->size(), PrevSetSize);
                if (canReplace) {
                    Hosts = newHosts;
                } else {
                    INFO_LOG << "Delta of host count is too large: "
                             << "was " << PrevSetSize << ", now " << newHosts->size() << Endl;
                }
                PrevSetSize = newHosts->size();
                return canReplace;
            }

            const TTopologyStorage::THostSetRef& Get() const {
                return Hosts;
            }

        private:
            inline static bool IsDeltaUnderThreshold(i64 size, i64 pastSize) {
                if (!TSettings::Get()->GetInterestedHostsChangeThreshold()) {
                    return true;
                }
                const auto threshold = TSettings::Get()->GetInterestedHostsChangeThreshold();
                return abs(size - pastSize) <= pastSize * threshold;
            }

            TTopologyStorage::THostSetRef Hosts;
            size_t PrevSetSize;
        };

        // Accepts list of replica urls, tries to get interested_hosts from any replica.
        THttpRequester::TFuture RequestInterestedHostsFromLocation(TArrayRef<const TString> urls) {
            if (urls.empty()) {
                auto promise = NThreading::NewPromise<NHttpFetcher::TResultRef>();
                promise.SetException("empty url list");
                return promise;
            } else {
                return THttpRequester::Get()->MakeRequest(urls[0] + "/api/v1/interested_hosts").Apply(
                    [urls, timer = TSimpleTimer()](const THttpRequester::TFuture& future) {
                        INFO_LOG << "Request to " << urls[0] << "/api/v1/interested_hosts took " << timer.Get() << Endl;
                        if (future.HasValue() && future.GetValue()->Code == HttpCodes::HTTP_OK || urls.size() == 1) {
                            // either request succeeded or we have no other replicas to try
                            return future;
                        } else {
                            // request failed, try other replicas
                            return RequestInterestedHostsFromLocation(urls.Slice(1));
                        }
                    }
                );
            }
        }

        TVector<TTopology::THostRef> ParseInterestedHostsResponse(const NHttpFetcher::TResultRef& response,
                                                                  const TTopologyStorage& topologyStorage) {
            const TString& data = response->Data;
            NJson::TJsonValue rootObj;
            NJson::ReadJsonFastTree(data, &rootObj, true);
            const auto& hostNames = rootObj["hosts"].GetArraySafe();

            TVector<TTopology::THostRef> hosts;
            hosts.reserve(hostNames.size());
            for (const auto& name : hostNames) {
                if (auto hostRef = topologyStorage.FindHost(name.GetStringSafe())) {
                    hosts.push_back(std::move(hostRef));
                }
            }
            return hosts;
        }

        NThreading::TFuture<TVector<TTopology::THostRef>> FetchCrossDcTargetHosts(const TTopologyStorage& topologyStorage) {
            if (TSettings::Get()->GetNetmonUrls().empty() ||
                TSettings::Get()->GetScheduledCrossDcProbesBetweenTwoPods() == 0)
            {
                return NThreading::MakeFuture(TVector<TTopology::THostRef>());
            }
            TVector<THttpRequester::TFuture> futures;
            for (const auto& replicaUrls : TSettings::Get()->GetNetmonUrls()) {
                if (!IsIn(TSettings::Get()->GetProbeScheduleRawDcs(), replicaUrls.first)) {
                    futures.emplace_back(RequestInterestedHostsFromLocation(replicaUrls.second));
                }
            }
            return NThreading::WaitAll(futures).Apply([futures, &topologyStorage](const NThreading::TFuture<void>&) {
                TVector<TTopology::THostRef> hosts;
                for (const auto& future : futures) {
                    try {
                        auto hostsInLocation = ParseInterestedHostsResponse(future.GetValue(), topologyStorage);
                        hosts.reserve(hosts.size() + hostsInLocation.size());
                        std::move(begin(hostsInLocation), end(hostsInLocation), std::back_inserter(hosts));
                    } catch (...) {
                        ERROR_LOG << "Failed to fetch target hosts for cross-dc probe schedule: " << CurrentExceptionMessage() << Endl;
                    }
                }
                return hosts;
            });
        }
    }

    class TProbeScheduleMaintainer::TImpl: public TScheduledTask {
    public:
        TImpl(const TTopologyStorage& topologyStorage, const IHostsMaintainer& walleUpdater, const IHostsMaintainer& rtUpdater)
            : TScheduledTask(TSettings::Get()->GetProbeRescheduleInterval(),
                             TSettings::Get()->GetProbeScheduleTtl() + SPARE_INTERVAL)
            , TopologyStorage(topologyStorage)
            , WalleUpdater(walleUpdater)
            , RtUpdater(rtUpdater)
            , Scheduler(MakeHolder<TUniformSwitchProbeScheduler>(TopologyStorage, [](const TSwitch& switch_) {
                    const bool knownQueue = TTopologySettings::Get()->GetKnownQueuesFilter().Check(switch_.GetLine().GetName());
                    const bool datacenterIsValid = TSettings::Get()->IsCrossDcProbeScheduleFullMesh() ||
                                                   IsIn(TSettings::Get()->GetProbeScheduleDcs(), switch_.GetDatacenter().GetName());

                    return datacenterIsValid && knownQueue &&
                           !TSettings::Get()->GetProbeScheduleExcludedSwitches().contains(switch_.GetName());
                },
                TSettings::Get()->GetScheduledProbesBetweenTwoSwitches(),
                TSettings::Get()->GetScheduledCrossDcProbesBetweenTwoPods()))
            , ProbeSchedule(IProbeScheduler::TProbeSchedule())
            , NewInterestedHosts(TTopologyStorage::THostSet())
        {
            for (const auto& pair : TSettings::Get()->GetNetmonUrls()) {
                for (const auto& dcName : TTopologySettings::Get()->GetMappedDcs(pair.first)) {
                    if (auto dc = TopologyStorage.FindDatacenter(dcName)) {
                        CrossDcTargets.emplace(dc->GetReducedId(), TStableHostSet());
                    }
                }
            }
        }

        TThreadPool::TFuture Run() override {
            return FutureChain(
                FetchCrossDcTargetHosts(TopologyStorage),
                [this](const TVector<TTopology::THostRef>& crossDcTargetHosts) {
                    return THeavyTaskThread::Get()->Add([this, crossDcTargetHosts]() {
                        Reschedule(crossDcTargetHosts);
                    });
                }
            );
        }

        inline TProbeScheduleRef GetSchedule() const noexcept {
            return ProbeSchedule.Get();
        }

        void AddInterestedHost(const TTopology::THostRef host) {
            if (IsTimeToCollectInterestedHosts()) {
                NewInterestedHosts.Own()->emplace(host);
            }
        }

        void AddLinkPollerStats(const TTopology::THostRef host, ui64 successCount, ui64 failCount) {
            auto linkPollerStats = LinkPollerStats.Own();
            auto& it = (*linkPollerStats)[host];
            it.Success += successCount;
            it.Failed += failCount;
        }

        TTopologyStorage::THostSetRef GetInterestedHosts() const {
            return InterestedHosts.Own()->Get();
        }

    private:
        inline bool IsTimeToCollectInterestedHosts() {
            return (TInstant::Now() >= (GetDeadline() - TSettings::Get()->GetProbeScheduleTtl() - SPARE_INTERVAL));
        }

        inline TAtomicSharedPtr<TTopologyStorage::THostSet> GetMutedHosts() {
            auto mutedHosts(MakeAtomicShared<TTopologyStorage::THostSet>());

            if (TSettings::Get()->GetLinkPollerFailsThresholdToHostMute() == 0 ||
                !TSettings::Get()->GetLinkPollerHostMuteDuration()) {
                return mutedHosts;
            }

            auto linkPollerStats(MakeAtomicShared<LinkPollerResultsMap>());
            LinkPollerStats.Swap(linkPollerStats);

            auto now = TInstant::Now();

            EraseNodesIf(HostMuteDeadlines, [now](const auto& kv) { return kv.second < now; });

            for (const auto& [host, stat] : *linkPollerStats) {
                const double total(stat.Failed + stat.Success);

                if (total > 0 &&
                    total >= MINIMUM_LINK_POLLER_PROBES_TO_MUTE &&
                    stat.Failed / total >= TSettings::Get()->GetLinkPollerFailsThresholdToHostMute()) {
                    mutedHosts->emplace(host);

                    if (mutedHosts->size() <= MUTED_HOSTS_COUNT_TO_LOG) {
                        INFO_LOG << "Mute host " << host->GetName() << Endl;
                    }

                    // remove from map before add for prevent topology leakage
                    HostMuteDeadlines.erase(host);
                    HostMuteDeadlines[host] = now + TSettings::Get()->GetLinkPollerHostMuteDuration();
                }
            }

            for (const auto& it : HostMuteDeadlines) {
                const auto& host = it.first;

                if (!mutedHosts->contains(host)) {
                    mutedHosts->emplace(host);
                }
            }

            return mutedHosts;
        }

        static THashMap<ui64, TAtomicSharedPtr<TTopologyStorage::THostSet>>
        GroupHostsByDc(const TVector<TTopology::THostRef>& hosts) {
            THashMap<ui64, TAtomicSharedPtr<TTopologyStorage::THostSet>> hostsByDc;

            for (const auto& host : hosts) {
                auto dcId = host->GetDatacenter().GetReducedId();
                auto it = hostsByDc.find(dcId);
                if (it.IsEnd()) {
                    auto [insertedIt, _] = hostsByDc.emplace(dcId, MakeAtomicShared<TTopologyStorage::THostSet>());
                    it = insertedIt;
                }
                it->second->insert(host);
            }
            return hostsByDc;
        }

        inline void Reschedule(const TVector<TTopology::THostRef>& crossDcTargetHosts) {
            auto newCrossDcTargets = GroupHostsByDc(crossDcTargetHosts);
            for (auto& [dcId, hosts] : CrossDcTargets) {
                auto dc = TopologyStorage.FindDatacenter(dcId);
                Y_VERIFY(dc);

                auto newHosts = newCrossDcTargets[dcId];
                if (!newHosts) {
                    newHosts = MakeAtomicShared<TTopologyStorage::THostSet>();
                }
                if (!hosts.TryReplace(newHosts)) {
                    INFO_LOG << "Cross-dc probe targets in " << dc->GetName() << " not updated" << Endl;
                }
                INFO_LOG << "Got " << hosts.Get()->size() << " probe targets in " << dc->GetName() << Endl;
            }

            auto interestedHosts(MakeAtomicShared<TTopologyStorage::THostSet>());
            NewInterestedHosts.Swap(interestedHosts);
            AtomicSet(InterestedHostsCount, interestedHosts->size());

            if (InterestedHosts.Own()->TryReplace(interestedHosts)) {
                TTopologyStorage::THostSet targets;
                for (const auto& pair : CrossDcTargets) {
                    auto hosts = pair.second.Get();
                    targets.insert(begin(*hosts), end(*hosts));
                }
                // Include interestedHosts in cross-dc targets, because we may
                // need to schedule probes between ProbeScheduleDcs.
                targets.insert(begin(*interestedHosts), end(*interestedHosts));

                const auto mutedHosts(GetMutedHosts());

                INFO_LOG << "Probe rescheduling for " << interestedHosts->size()
                         << " interested hosts, " << targets.size()
                         << " cross-dc hosts, " << mutedHosts->size()
                         << " muted hosts started..." << Endl;
                TUnistatTimer timer{TUnistat::Instance(), ENetmonSignals::ProbeSchedulingTime};

                TTopologyStorage::THostSet deadHosts;
                const auto walleDeadHosts(WalleUpdater.GetHosts());
                deadHosts.insert(walleDeadHosts->begin(), walleDeadHosts->end());
                const auto rtDeadhosts(RtUpdater.GetHosts());
                deadHosts.insert(rtDeadhosts->begin(), rtDeadhosts->end());

                auto newProbeSchedule(MakeAtomicShared<IProbeScheduler::TProbeSchedule>(
                    Scheduler->Schedule(*interestedHosts, deadHosts, targets, *mutedHosts)
                ));
                ProbeSchedule.Swap(newProbeSchedule);

                INFO_LOG << "Probe rescheduling finished" << Endl;
            } else {
                // Don't schedule probes immediately after aggregator starts up,
                // wait until we've discovered all interested hosts in our location,
                // i.e. InterestedHosts isn't changing quickly.
                //
                // First iteration always gets skipped. It's fine because agents
                // often take longer than 5 minutes to discover a new backend.
                INFO_LOG << "Interested hosts not updated" << Endl;
                INFO_LOG << "Probe rescheduling skipped" << Endl;
            }
        }

        const TTopologyStorage& TopologyStorage;
        const IHostsMaintainer& WalleUpdater;
        const IHostsMaintainer& RtUpdater;
        THolder<IProbeScheduler> Scheduler;
        TProbeScheduleBox ProbeSchedule;

        TTopologyStorage::THostSetBox NewInterestedHosts;
        TPlainLockedBox<TStableHostSet> InterestedHosts;
        THashMap<ui64, TStableHostSet> CrossDcTargets;

        struct LinkProbeResult {
            ui64 Success, Failed;
        };

        using LinkPollerResultsMap = THashMap<TTopology::THostRef, LinkProbeResult>;
        TAtomicLockedBox<LinkPollerResultsMap> LinkPollerStats;
        THashMap<TTopology::THostRef, TInstant> HostMuteDeadlines;
    };

    TProbeScheduleMaintainer::TProbeScheduleMaintainer(const TTopologyStorage& topologyStorage, const IHostsMaintainer& walleUpdater, const IHostsMaintainer& rtUpdater)
        : Impl(MakeHolder<TImpl>(topologyStorage, walleUpdater, rtUpdater))
        , SchedulerGuard(TSettings::Get()->GetProbeRescheduleInterval() ? Impl->Schedule() : nullptr)
    {
    }

    TProbeScheduleMaintainer::~TProbeScheduleMaintainer() = default;

    TProbeScheduleMaintainer::TProbeScheduleRef TProbeScheduleMaintainer::GetSchedule() const noexcept {
        return Impl->GetSchedule();
    }

    void TProbeScheduleMaintainer::AddInterestedHost(const TTopology::THostRef host) {
        return Impl->AddInterestedHost(host);
    }

    void TProbeScheduleMaintainer::AddLinkPollerStats(const TTopology::THostRef host, ui64 successCount, ui64 failCount) {
        return Impl->AddLinkPollerStats(host, successCount, failCount);
    }

    TTopologyStorage::THostSetRef TProbeScheduleMaintainer::GetInterestedHosts() const {
        return Impl->GetInterestedHosts();
    }

    TThreadPool::TFuture TProbeScheduleMaintainer::SpinAndWait() noexcept {
        return Impl->SpinAndWait();
    }
}
