#include "shard_resolver.h"

#include <solomon/services/fetcher/lib/fetcher_shard.h>
#include <solomon/services/fetcher/lib/app_data.h>
#include <solomon/services/fetcher/lib/host_list_cache/host_list_cache.h>

#include <solomon/libs/cpp/actors/events/events.h>
#include <solomon/libs/cpp/logging/logging.h>

#include <library/cpp/actors/core/actor_bootstrapped.h>
#include <library/cpp/actors/core/actorsystem.h>
#include <library/cpp/actors/core/hfunc.h>
#include <library/cpp/actors/core/log.h>
#include <library/cpp/monlib/metrics/metric_registry.h>
#include <library/cpp/monlib/metrics/histogram_collector.h>
#include <library/cpp/monlib/metrics/timer.h>

#include <util/digest/sequence.h>
#include <util/generic/algorithm.h>
#include <util/system/hp_timer.h>

#include <utility>

namespace NSolomon::NFetcher {
namespace {
    using namespace NActors;
    using namespace NThreading;
    using namespace NMonitoring;

    constexpr auto RESOLVE_TIMEOUT = TDuration::Minutes(1);
    constexpr auto RELOAD_INTERVAL = TDuration::Seconds(30);

    constexpr auto CACHE_UPDATE_THRESHOLD = TDuration::Minutes(5);

    struct TLocalEvents: private TPrivateEvents {
        enum {
            EvRequestCompleted = SpaceBegin,
            EvResolveCompleted,
            EvClusterChanged,
            End,
        };
        static_assert(End < SpaceEnd, "too many event types");

        struct TEvResolveRequestCompleted: TEventLocal<TEvResolveRequestCompleted, EvRequestCompleted> {
            explicit TEvResolveRequestCompleted(TResolveResult&& r, IHostGroupResolverPtr resolver)
                : Result{std::move(r)}
                , Resolver{std::move(resolver)}
            {
            }

            TResolveResult Result;
            IHostGroupResolverPtr Resolver;
        };

        struct TEvClusterChanged: TEventLocal<TEvClusterChanged, EvClusterChanged> {
            explicit TEvClusterChanged(IFetcherClusterPtr cluster)
                : Cluster{std::move(cluster)}
            {
            }

            IFetcherClusterPtr Cluster;
        };
    };

    class TClusterResolverActor: public TActorBootstrapped<TClusterResolverActor> {
    public:
        TClusterResolverActor(TActorId receiver, TClusterId clusterId, TVector<IHostGroupResolverPtr> resolvers, TDuration timeout)
            : Receiver_{receiver}
            , ClusterId_{std::move(clusterId)}
            , Timeout_{timeout}
        {

            auto it = std::make_move_iterator(resolvers.begin());
            const auto end = std::make_move_iterator(resolvers.end());
            for (; it != end; ++it) {
                Resolvers_.emplace(std::move(*it), TClusterResolveResult{});
            }
        }

        // NOLINTNEXTLINE(performance-unnecessary-value-param): false positive
        TClusterResolverActor(TActorId receiver, TResolveResults resolvers, TDuration timeout)
            : Receiver_{receiver}
            , Timeout_{timeout}
        {

            for (auto& [resolver, result]: resolvers) {
                Resolvers_.emplace(resolver, std::move(result));
            }
        }

        void Bootstrap(const TActorContext& ctx) {
            if (Resolvers_.empty()) {
                RespondAndDie();
                return;
            }

            Become(&TThis::StateWaiting);
            Inflight_ = Resolvers_.size();

            for (auto it =  Resolvers_.begin(); it != Resolvers_.end(); ++it) {
                StartResolver(it, ctx);
            }

            if (Timeout_ != TDuration::Max()) {
                Schedule(Timeout_, new TEvents::TEvPoison);
            }
        }

        STATEFN(StateWaiting) {
            switch (ev->GetTypeRewrite()) {
                hFunc(TLocalEvents::TEvResolveRequestCompleted, OnCompleted);
                hFunc(TEvents::TEvPoison, OnPoisonPill);
            }
        }

        void OnCompleted(const TLocalEvents::TEvResolveRequestCompleted::TPtr& ev) {
            auto& result = ev->Get()->Result;
            if (result.Success()) {
                THashSet<THostAndLabels> hls;
                auto val = result.Extract();
                Copy(std::make_move_iterator(val.begin()),
                    std::make_move_iterator(val.end()),
                    std::inserter(hls, hls.end())
                );

                Resolvers_[ev->Get()->Resolver] = {hls};
            } else {
                Resolvers_[ev->Get()->Resolver] = result.PassError<THashSet<THostAndLabels>>();
            }

            --Inflight_;
            if (Inflight_ == 0) {
                RespondAndDie();
            }
        }

        void OnPoisonPill(const TEvents::TEvPoison::TPtr&) {
            if (Inflight_ > 0) {
                MON_WARN(ShardResolver, "Killing shard resolver actor while " << Inflight_ << " tasks are still running");
            }

            RespondAndDie();
        }

    private:
        void RespondAndDie() {
            Send(Receiver_, new TEvClusterResolved{
                ClusterId_,
                MakeAtomicShared<TResolveResults>(std::move(Resolvers_))
            });

            PassAway();
        }

    private:
        void StartResolver(TResolveResults::iterator it, const TActorContext& ctx) {
            const auto self = SelfId();

            if (it->second.IsInitialized()) {
                return;
            }

            it->second = TClusterResolveResult::FromError("Request timed out");
            auto& r = it->first;

            auto* as = ctx.ExecutorThread.ActorSystem;
            r->Resolve().Subscribe([=] (auto f) {
                auto result = f.ExtractValueSync();
                as->Send(self, new TLocalEvents::TEvResolveRequestCompleted{std::move(result), r});
            });
        }

    private:
        TResolveResults Resolvers_;
        TActorId Receiver_;
        TClusterId ClusterId_;
        ui32 Inflight_ = 0;
        TDuration Timeout_;
    };


    class TClusterActor: public TActorBootstrapped<TClusterActor>, private TPrivateEvents {
        enum {
            EvCacheLoaded = SpaceBegin,
            End,
        };
        static_assert(End < SpaceEnd, "too many event types");

        struct TEvCacheLoaded: public NActors::TEventLocal<TEvCacheLoaded, EvCacheLoaded> {
            DEFINE_SIMPLE_LOCAL_EVENT(TEvCacheLoaded, "TEvCacheLoaded");
            TEvCacheLoaded(TVector<THostListCacheItem> items)
                : Items{std::move(items)}
            {
            }

            TVector<THostListCacheItem> Items;
        };

    public:
        TClusterActor(IMetricRegistry& registry, IFetcherClusterPtr cluster, IHostResolverFactoryPtr factory, TActorId receiver, IHostListCachePtr cache = {})
            : Registry_{registry}
            , Cluster_{std::move(cluster)}
            , ClusterId_{Cluster_->Id()}
            , Receiver_{receiver}
            , HostListCache_{std::move(cache)}
            , ResolverFactory_{std::move(factory)}
        {
            Y_UNUSED(Registry_);
        }

        void Bootstrap() {
            Become(&TThis::StateWork);
            UpdateResolvers();
            Resolve();
            LoadFromCache();
        }

        STFUNC(StateWork) {
            Y_UNUSED(ctx);

            switch (ev->GetTypeRewrite()) {
                hFunc(TEvClusterResolved, OnCompleted);
                sFunc(TEvents::TEvWakeup, Resolve);
                hFunc(TEvents::TEvPoison, OnPoison);
                hFunc(TEvCacheLoaded, OnCacheLoaded);
                hFunc(TLocalEvents::TEvClusterChanged, OnClusterChanged);
            }
        }

    private:
        void OnClusterChanged(const TLocalEvents::TEvClusterChanged::TPtr& ev) {
            auto&& cluster = ev->Get()->Cluster;
            if (!cluster->Equals(*Cluster_)) {
                Cluster_ = cluster;
                UpdateResolvers();
            }
        }

        void OnCompleted(const TEvClusterResolved::TPtr& ev) {
            Resolved_ = true;
            Resolver_ = {};

            auto& data = *ev->Get();
            Send(Receiver_, new TEvClusterResolved{Cluster_->Id(), std::move(data.Result)});

            if (NeedContinue_) {
                DoResolve();
                return;
            }

            auto jitter = TDuration::MilliSeconds(RandomNumber(RELOAD_INTERVAL.MilliSeconds() / 2));
            Schedule(RELOAD_INTERVAL + jitter, new TEvents::TEvWakeup);
        }

        void UpdateResolvers() {
            Resolvers_.clear();
            auto result = Cluster_->CreateResolvers(*ResolverFactory_);

            for (auto&& r: result) {
                if (!r.Success()) {
                    MON_WARN(ShardResolver, r.Error().Message());
                    continue;
                }
                Resolvers_.push_back(r.Extract());
            }
        }

        void Resolve() {
            if (Resolver_) {
                MON_INFO(ShardResolver, "Trying to start multiple resolve requests for cluster " << ClusterId_);
                NeedContinue_ = true;
                return;
            }

            DoResolve();
        }

        void DoResolve() {
            NeedContinue_ = false;
            Resolver_ = Register(new TClusterResolverActor{SelfId(), ClusterId_, Resolvers_, RESOLVE_TIMEOUT});
        }

        void LoadFromCache() {
            if (HostListCache_ == nullptr) {
                return;
            }

            TVector<TString> cacheKeys;

            for (auto&& r: Resolvers_) {
                if (!r->CacheKey()) {
                    continue;
                }

                auto key = *r->CacheKey();
                cacheKeys.push_back(key);
            }

            auto* as = TActorContext::ActorSystem();

            HostListCache_->Find(cacheKeys).Apply([self = SelfId(), as, clusterId = ClusterId_.ToString()] (auto&& f) {
                try {
                    as->Send(self, new TEvCacheLoaded{f.GetValue()});
                } catch (...) {
                    MON_ERROR_C(*as, ShardResolver, "Failed to load host list cache for cluster "
                            << clusterId << ": " << CurrentExceptionMessage());
                }
            });
        }

        bool IsCacheTooOld() const {
            return TInstant::Now() - LastCacheUpdate_ >= CACHE_UPDATE_THRESHOLD;
        }

        void OnPoison(const TEvents::TEvPoison::TPtr&) {
            MON_INFO(ShardResolver, "Stop resolving cluster " << ClusterId_);
            if (Resolver_) {
                Send(Resolver_, new TEvents::TEvPoison);
            }
            PassAway();
        }

        void OnCacheLoaded(const TEvCacheLoaded::TPtr& ev) {
            if (Resolved_) {
                return;
            }

            auto&& result = MakeAtomicShared<TResolveResults>();

            for (auto& cacheItem: ev->Get()->Items) {
                auto it = std::find_if(Resolvers_.begin(), Resolvers_.end(), [&] (auto&& r) {
                    return r->CacheKey() == cacheItem.Id;
                });

                Y_VERIFY_DEBUG(it != Resolvers_.end());
                if (Y_UNLIKELY(it == Resolvers_.end())) {
                    MON_ERROR(ShardResolver, "Cache key " << cacheItem.Id << " loaded from cache, but missing in local collection");
                    continue;
                }

                (*result)[*it] = THashSet<THostAndLabels>(
                    std::make_move_iterator(cacheItem.Value.begin()),
                    std::make_move_iterator(cacheItem.Value.end())
                );
            }

            Send(Receiver_, new TEvClusterResolved{Cluster_->Id(), std::move(result)});
        }

        void UpdateCache(const TResolveResults& resolveResults) {
            if (!HostListCache_ || !IsCacheTooOld()) {
                return;
            }

            const auto now = TInstant::Now();
            LastCacheUpdate_ = now;
            for (auto&& [resolver, result]: resolveResults) {
                if (!result.Success() || !resolver->CacheKey()) {
                    continue;
                }

                auto hosts = result.Value();
                auto* as = TActivationContext::AsActorContext().ExecutorThread.ActorSystem;

                HostListCache_->InsertOrUpdate({
                    *resolver->CacheKey(),
                    {hosts.begin(), hosts.end()},
                    now,
                }).Subscribe([as, cacheKey = resolver->CacheKey()] (auto&& f) {
                    if (!f.HasException()) {
                        return;
                    }

                    try {
                        f.GetValue();
                    } catch (...) {
                        MON_WARN_C(*as, FetcherShard, "Error while updating cache for "
                                << cacheKey << ": " << CurrentExceptionMessage());
                    }
                });
            }

        }

    private:
        IMetricRegistry& Registry_;
        IFetcherClusterPtr Cluster_;
        TClusterId ClusterId_;
        TVector<IHostGroupResolverPtr> Resolvers_;
        TActorId Receiver_;
        TActorId Resolver_;
        IHostListCachePtr HostListCache_;
        TInstant LastCacheUpdate_;
        bool Resolved_{false};
        IHostResolverFactoryPtr ResolverFactory_;
        bool NeedContinue_{false};
    };

    class TClusterManagerCounters {
    public:
        TClusterManagerCounters(IMetricRegistry& registry) {
            Tasks_ = registry.IntGauge(MakeLabels({{"sensor", "clusterManager.taskCount"}}));
        }

        void AddTask() {
            Tasks_->Inc();
        }

        void RemoveTask() {
            Tasks_->Dec();
        }

    private:
        IIntGauge* Tasks_;
    };

    class TClusterConfigManager: public TActorBootstrapped<TClusterConfigManager> {
        struct TTask {
            TActorId Actor;
            THashSet<TActorId> Subscribers;
            TResolveResultsPtr Groups;
        };

    public:
        TClusterConfigManager(IMetricRegistry& registry, IHostResolverFactoryPtr factory, IHostListCachePtr cache)
            : Registry_{registry}
            , Counters_{Registry_}
            , Cache_{std::move(cache)}
            , ResolverFactory_{std::move(factory)}
        {
        }

        void Bootstrap(const TActorContext&) {
            Send(MakeConfigUpdaterId(), new TEvents::TEvSubscribe);
            Become(&TThis::StateWork);
        }

        STATEFN(StateWork) {
            switch (ev->GetTypeRewrite()) {
                hFunc(TEvResolveCluster, OnResolveCluster);
                hFunc(TEvClusterResolved, OnResolved);
                hFunc(TEvClusterUnsubscribe, OnUnsubscribe);
                hFunc(TEvClustersChanged, OnClustersChanged);
                cFunc(TEvents::TSystem::PoisonPill, PassAway);
            }
        }

        void OnResolved(const TEvClusterResolved::TPtr& evPtr) {
            auto& ev = *evPtr->Get();
            auto&& id = ev.Id;

            if (auto* task = Tasks_.FindPtr(id)) {
                task->Groups = ev.Result;

                for (auto&& s: task->Subscribers) {
                    Send(s, new TEvClusterResolved{ev.Id, ev.Result});
                }
            } else {
                MON_WARN(ShardResolver, "Task with id " << id.ToString() << " not found");
            }
        }

        void ResolveContinuous(TEvResolveCluster& ev, TActorId receiver) {
            auto&& id = ev.Cluster->Id();
            Y_VERIFY_DEBUG(id.IsValid(), "Cluster without a valid id/project id cannot be scheduled for resolve");

            TTask* task = Tasks_.FindPtr(id);

            // we may already know something about this cluster or a request to resolve it is underway
            // if the latter is the case, groups are still empty and we'll just add the requesting actor to subscribers
            // and notify it once request is completed
            if (task && task->Groups) {
                Send(receiver, new TEvClusterResolved{id, task->Groups});
            } else if (!task) {
                task = CreateTask(ev.Cluster);
            }

            task->Subscribers.emplace(receiver);
        }

        void ForcedResolve(TEvResolveCluster& ev) {
            auto&& id = ev.Cluster->Id();
            auto* task = Tasks_.FindPtr(id);

            if (task) {
                Send(task->Actor, new TEvents::TEvWakeup);
            } else {
                MON_WARN(ShardResolver, "Task with id " << id.ToString() << " not found");
            }
        }

        void ResolveOnce(TEvResolveCluster& ev, TActorId receiver) {
            auto&& id = ev.Cluster->Id();

            if (!id.IsValid()) {
                // skip cache search, just resolve
            } else if (auto* task = Tasks_.FindPtr(id); task && task->Groups && !task->Groups->empty()) {
                Send(receiver, new TEvClusterResolved{id, task->Groups});
                return;
            }

            TVector<IHostGroupResolverPtr> resolvers;
            for (auto& res: ev.Cluster->CreateResolvers(*ResolverFactory_)) {
                if (res.Success()) {
                    resolvers.push_back(res.Extract());
                    continue;
                }

                MON_WARN(ShardResolver, "Error while trying to create host group: " << res.Error().Message());
            }

            Register(new TClusterResolverActor(receiver, id, std::move(resolvers), RESOLVE_TIMEOUT));
        }

        void OnResolveCluster(const TEvResolveCluster::TPtr& evPtr) {
            auto& ev = *evPtr->Get();
            switch (ev.SubscriptionType) {
                case TEvResolveCluster::ESubscriptionType::Once:
                    ResolveOnce(ev, evPtr->Sender);
                    break;
                case TEvResolveCluster::ESubscriptionType::Subscribe: {
                    ResolveContinuous(ev, evPtr->Sender);
                    break;
                }
                case TEvResolveCluster::ESubscriptionType::Forced: {
                    ForcedResolve(ev);
                    break;
                }
            };
        }

        void OnUnsubscribe(const TEvClusterUnsubscribe::TPtr& ev) {
            const auto clusterId = ev->Get()->Id;
            if (auto it = Tasks_.find(clusterId); it != Tasks_.end()) {
                auto&& task = it->second;
                task.Subscribers.erase(ev->Sender);

                if (task.Subscribers.empty()) {
                    Send(task.Actor, new TEvents::TEvPoisonPill);
                    Tasks_.erase(it);
                    MON_WARN(ShardResolver, "Cluster " << clusterId.ToString() << " will no longer be resolved");
                    Counters_.RemoveTask();
                }
            }
        }

        void OnClustersChanged(const TEvClustersChanged::TPtr& ev) {
            auto& changedClusters = ev->Get()->Clusters;

            for (auto&& cluster: changedClusters) {
                if (auto* task = Tasks_.FindPtr(cluster->Id())) {
                    Send(task->Actor, new TLocalEvents::TEvClusterChanged{cluster});
                }
            }
        }

    private:
        TTask* CreateTask(IFetcherClusterPtr cluster) {
            auto [it, ok] = Tasks_.emplace(cluster->Id(), TTask{});
            Y_VERIFY_DEBUG(ok, "Task with id %s", cluster->Id().ToString().c_str());

            if (!ok) {
                return &it->second;
            }

            MON_INFO(ShardResolver, "Spawning task for cluster " << cluster->Id().ToString());

            auto* task = &it->second;
            task->Actor = Register(
                new TClusterActor{Registry_, std::move(cluster), ResolverFactory_, SelfId(), Cache_}
            );

            Counters_.AddTask();

            return task;
        }

    private:
        THashMap<TClusterId, TTask> Tasks_;
        IMetricRegistry& Registry_;
        TClusterManagerCounters Counters_;
        IHostListCachePtr Cache_;
        IHostResolverFactoryPtr ResolverFactory_;
    };
} // namespace

    TVector<TErrorOr<IHostGroupResolverPtr, TGenericError>> CreateResolversForCluster(const NDb::NModel::TClusterConfig& cluster, TAppData& appData);

    IActor* ResolveCluster(TVector<IHostGroupResolverPtr> resolvers, TActorId receiver, TDuration timeout) {
        return new TClusterResolverActor{receiver, {}, std::move(resolvers), timeout};
    }

    IActor* CreateClusterActor(IMetricRegistry& registry, IFetcherClusterPtr cluster, IHostResolverFactoryPtr factory, TActorId receiver) {
        return new TClusterActor{registry, std::move(cluster), std::move(factory), receiver};
    }

    IActor* CreateClusterManager(IMetricRegistry& registry, IHostResolverFactoryPtr factory, IHostListCachePtr cache) {
        return new TClusterConfigManager{registry, std::move(factory), std::move(cache)};
    }
} // namespace NSolomon::NFetcher
