#pragma once

#include <infra/netmon/library/memory_pool.h>
#include <infra/netmon/library/helpers.h>
#include <infra/netmon/library/fences.h>
#include <infra/netmon/library/boxes.h>
#include <infra/netmon/library/thread_pool.h>
#include <infra/netmon/library/clickhouse/client.h>
#include <infra/netmon/library/settings.h>
#include <infra/netmon/library/metrics.h>

#include <library/cpp/consistent_hashing/consistent_hashing.h>
#include <library/cpp/containers/intrusive_avl_tree/avltree.h>
#include <util/datetime/cputimer.h>
#include <util/draft/holder_vector.h>
#include <util/generic/string.h>
#include <util/generic/xrange.h>
#include <util/string/builder.h>
#include <util/system/event.h>
#include <util/system/spinlock.h>
#include <util/system/thread.h>
#include <util/thread/lfqueue.h>

namespace NNetmon {
    extern TAtomic MemPoolStatsBatchCount;

    namespace {
        const TDuration RECORD_TTL = TDuration::Seconds(300);
        const TDuration BATCH_TTL = TDuration::Seconds(5);

        const ui64 ERROR_LIMIT = 5;
        const TDuration RELAXATION_INTERVAL = TDuration::Seconds(10);

        const TDuration RETRY_TIMEOUT = TDuration::Seconds(5);

        template <class T>
        class TDumperHolderVector: public THolderVector<T> {
        public:
            using THolderVector<T>::THolderVector;
            using THolderVector<T>::Release;

            void Release() {
                for (const auto& idx : xrange(this->Size())) {
                    (*this)[idx] = nullptr;
                }
            };
        };

        template <class T>
        class TDumperLockFreeQueue: public TLockFreeQueue<T*> {
        public:
            ~TDumperLockFreeQueue() {
                typename T::TRef t;
                while (this->Dequeue(&t)) {
                }
            }

            inline bool Dequeue(typename T::TRef* t) {
                T* res = nullptr;

                if (TLockFreeQueue<T*>::Dequeue(&res)) {
                    t->Reset(res);

                    return true;
                }

                return false;
            }

            inline void EnqueueAll(TDumperHolderVector<T>& data) {
                TLockFreeQueue<T*>::EnqueueAll(data);
                data.Release();
            }
        };
    }

    // disable report pooling for now, consume too much CPU in aggregator
    // TODO: remove completely or return back?
    // template <class T>
    // class TBatchRecord: public TAvlTreeItem<TBatchRecord<T>, TCompareUsingLess>,
    //                     public TSafeObjectFromPool<TBatchRecord<T>> {
    template <class T>
    class TBatchRecord: public TAvlTreeItem<TBatchRecord<T>, TCompareUsingLess> {
    public:
        using TType = TBatchRecord<T>;
        using TTree = TAvlTree<TType, TCompareUsingLess>;

        using TRef = THolder<TType>;
        using TRefVector = TDumperHolderVector<TType>;
        using TRecordQueue = TDumperLockFreeQueue<TType>;

        // using TPool = typename TSafeObjectFromPool<TType>::TPool;

        template <typename... Args>
        static inline TRef Make(Args&&... args) {
            AtomicIncrement(MemPoolStatsBatchCount);
            // return new (Singleton<TPool>()) TType(std::forward<Args>(args)...);
            return TRef(new TType(std::forward<Args>(args)...));
        }

        inline const T& GetObject() const {
            return Object;
        }
        inline size_t GetHash() const {
            return Default<THash<T>>()(Object);
        }

        inline const TInstant& GetGenerated() const {
            return Generated;
        }
        const TInstant& GetDeadline() const {
            return Deadline;
        }
        void SetDeadline(const TInstant& deadline) {
            Deadline = deadline;
        }

        bool IsExpired(const TInstant& now) const {
            return Generated < now - RECORD_TTL;
        }

        inline bool operator<(const TType& rhs) const {
            return Deadline < rhs.Deadline || (Deadline == rhs.Deadline && this < &rhs);
        }

        ~TBatchRecord() {
            AtomicDecrement(MemPoolStatsBatchCount);
        }

    private:
        explicit TBatchRecord(const T& object, TInstant generated)
            : Object(object)
            , Generated(generated)
        {
        }

        explicit TBatchRecord(T&& object, TInstant generated)
            : Object(std::move(object))
            , Generated(generated)
        {
        }

        T Object;
        TInstant Generated;
        TInstant Deadline;
    };

    template <class T>
    class TBatch: public TNonCopyable {
    public:
        using TType = TBatch<T>;
        using TRef = THolder<TType>;

        using TRecord = TBatchRecord<T>;

        TBatch(std::size_t shardIndex, std::size_t batchSize)
            : ShardIndex(shardIndex)
            , BatchSize(batchSize)
            , Deadline(TInstant::Now() + BATCH_TTL)
        {
        }

        virtual ~TBatch() = default;

        bool IsReady() const {
            if (Empty()) {
                return false;
            } else {
                return Records.size() >= BatchSize || Deadline <= TInstant::Now();
            }
        }

        void Enqueue(typename TRecord::TRef record) {
            Records.PushBack(std::move(record));
        }

        void DequeueAll(typename TRecord::TTree& tree) {
            ui64 outdated = 0;
            const auto now(TInstant::Now());
            const TInstant deadline(now + RETRY_TIMEOUT);
            for (const auto idx : xrange(Records.size())) {
                if (Records[idx]->IsExpired(now)) {
                    outdated++;
                } else {
                    Records[idx]->SetDeadline(deadline);
                    tree.Insert(Records.Release(idx));
                }
            }

            TUnistat::Instance().PushSignalUnsafe(ELibrarySignals::DumperOutdatedRecords, outdated);
        }

        inline const typename TRecord::TRefVector& GetRecords() const {
            return Records;
        }
        bool Empty() const {
            return Records.empty();
        }
        std::size_t GetShardIndex() const {
            return ShardIndex;
        }
        TDuration GetTimer() const {
            return Timer.Get();
        }
        void ResetTimer() {
            Timer.Reset();
        }

        virtual TThreadPool::TFuture Dispatch() = 0;

    private:
        const std::size_t ShardIndex;
        const std::size_t BatchSize;
        typename TRecord::TRefVector Records;
        TSimpleTimer Timer;
        TInstant Deadline;
    };

    template <class T>
    class TRecordDumper: public INamedThread {
    public:
        using TType = TRecordDumper<T>;
        using TThreadGuard = TThreadGuard<TType>;

        using TBatch = T;
        using TRecord = typename T::TRecord;

        TRecordDumper(typename TRecord::TRecordQueue& queue_)
            : INamedThread("RecordDumper")
            , Queue(queue_)
            , ShouldStop(false)
        {
        }

        virtual ~TRecordDumper() = default;

        void* ThreadProc() noexcept override {
            ShardStates.Own()->resize(GetShardCount());

            for (const auto& shardIndex : xrange(GetShardCount())) {
                Batches.emplace_back(CreateBatch(shardIndex));
            }

            while (!ShouldStop) {
                if (!ProcessRecords()) {
                    Event.WaitT(TDuration::MilliSeconds(100));
                }
            }

            BatchesFence.Wait();

            return this;
        }

        void Stop() {
            ShouldStop = true;
            Event.Signal();
            Join();

            // cleanup
            auto records(DelayedRecords.Own());
            while (!records->Empty()) {
                typename TRecord::TRef probeRef(&(*records->First()));
            }
        }

        bool IsReady() const {
            auto shardStates(ShardStates.Own());
            const auto since(TInstant::Now() - RELAXATION_INTERVAL);
            for (const auto& state : *shardStates) {
                if (state.ErrorCount > ERROR_LIMIT && since < state.ErrorTime) {
                    return false;
                }
            }
            return true;
        }

        virtual std::size_t GetShardCount() = 0;
        virtual std::size_t GetBatchSize() = 0;
        virtual typename TBatch::TRef CreateBatch(std::size_t shardIndex) = 0;

    private:
        struct TShardState {
            ui64 ErrorCount;
            TInstant ErrorTime;
        };

        bool ProcessRecords() {
            DispatchExpiredRecords();
            DequeueRecords();

            bool processed = false;
            for (auto& batch : Batches) {
                if (batch->IsReady()) {
                    const auto shardIndex(batch->GetShardIndex());

                    /* FIXME: Guard should be created while being in parent thread
                       for thread sync to take effect.
                       C++11 does not support move semantics in lambda and
                       TGuard is not copyable, so workaround with TIntrusivePtr
                       instead */

                    batch->Dispatch().Subscribe([batch_ = batch.Release(), this, guard = MakeIntrusive<TOwningGuard>(BatchesFence)] (const TThreadPool::TFuture& future_) {
                        typename TBatch::TRef batch(batch_);
                        auto ownedState(ShardStates.Own());
                        auto& shardState = ownedState->at(batch->GetShardIndex());

                        try {
                            future_.GetValue();
                        } catch (...) {
                            auto records(DelayedRecords.Own());
                            batch->DequeueAll(*records);

                            shardState.ErrorCount++;
                            shardState.ErrorTime = TInstant::Now();

                            ERROR_LOG << "Unable to insert probes into shard " << batch->GetShardIndex()
                                    << " (took " << batch->GetTimer() << "): " << CurrentExceptionMessage()
                                    << Endl;

                            return;
                        }

                        shardState.ErrorCount = 0;
                        shardState.ErrorTime = TInstant::Zero();

                        TUnistat::Instance().PushSignalUnsafe(
                            ELibrarySignals::DumperInsertionTime, batch->GetTimer().MilliSeconds());

                        INFO_LOG << "Probe insertion into shard " << batch->GetShardIndex()
                                << " took " << batch->GetTimer() << Endl;
                    });

                    batch.Reset(CreateBatch(shardIndex));
                    processed = true;
                }
            }

            return processed;
        }

        void EnqueueRecord(typename TRecord::TRef record) {
            const auto idx = ConsistentHashing(record->GetHash(), Batches.size());
            Batches[idx]->Enqueue(std::move(record));
        }

        void DequeueRecords() {
            ui64 processed = 0;
            ui64 outdated = 0;

            const auto now(TInstant::Now());
            typename TRecord::TRef record;
            while (processed < Batches.size() * GetBatchSize() && Queue.Dequeue(&record)) {
                if (record->IsExpired(now)) {
                    outdated++;
                } else {
                    EnqueueRecord(std::move(record));
                    processed++;
                }
            }

            auto& stats = TUnistat::Instance();
            stats.PushSignalUnsafe(ELibrarySignals::DumperProcessedRecords, processed);
            stats.PushSignalUnsafe(ELibrarySignals::DumperOutdatedRecords, outdated);
        }

        void DispatchExpiredRecords() {
            ui64 outdated = 0;
            ui64 processed = 0;

            const auto now(TInstant::Now());

            auto records(DelayedRecords.Own());
            while (!records->Empty() && processed < Batches.size() * GetBatchSize()) {
                typename TRecord::TRef record(&(*records->First()));
                if (record->GetDeadline() > now) {
                    Y_UNUSED(record.Release());
                    break;
                } else {
                    record->Unlink();
                    if (record->IsExpired(now)) {
                        outdated++;
                    } else {
                        processed++;
                        EnqueueRecord(std::move(record));
                    }
                }
            }

            TUnistat::Instance().PushSignalUnsafe(ELibrarySignals::DumperOutdatedRecords, outdated);
        }

        typename TRecord::TRecordQueue& Queue;
        TVector<typename TBatch::TRef> Batches;
        TPlainLockedBox<typename TRecord::TTree> DelayedRecords;
        TPlainLockedBox<TVector<TShardState>> ShardStates;
        TOwningFence BatchesFence;

        volatile bool ShouldStop;
        TAutoEvent Event;
    };
}
