#pragma once

#include <util/generic/hash.h>
#include <util/generic/hash_set.h>
#include <util/generic/vector.h>
#include <util/generic/yexception.h>
#include <util/system/mutex.h>

#include <memory>

namespace NSv {
    // Construct an object of type V by calling the provided function. For repeated calls with
    // the same key, return the old object so long as there is still at least one pointer
    // to it. Can only be used from one thread for each combination of types K, V, and F.
    template <typename K, typename F, typename V = std::invoke_result_t<F>>
    static std::shared_ptr<V> StaticData(K&& k, F&& f) {
        static THashMap<std::decay_t<K>, std::weak_ptr<V>> st;
        auto it = st.emplace(std::forward<K>(k), std::weak_ptr<V>{}).first;
        auto ret = it->second.lock();
        if (!ret) {
            it->second = ret = {new V(f()), [k = &it->first](V* v) noexcept {
                st.erase(*k);
                delete v;
            }};
        }
        return ret;
    }

    // Rooted thread-local storage. Unlike `pthread_key_t`, this can be destroyed by
    // the owning thread to collect all thread-local data at any point, not just at thread
    // termination. This is useful because 1. at thread termination, there are no more
    // coroutines running; 2. thread-local data may actually reference some coroutines,
    // *preventing* the thread from terminating until the coroutines are somehow cancelled.
    // (Also, the number of slots is configurable, not fixed at PTHREAD_KEYS_MAX, which is
    // 1024 on Linux.)
    class TThreadLocalRoot : TNonCopyable {
    private:
        struct TSlot {
            void* P_{nullptr}; // actually T*
            void* D_{nullptr}; // actually TValue<T>*
            void (*F_)(void* d) = nullptr;
        };

    public:
        template <typename T>
        class TValue {
        public:
            TValue() {
                with_lock (FreeM_) {
                    Y_ENSURE(Free_, "out of free slots");
                    I_ = Free_.back();
                    Free_.pop_back();
                }
            }

            ~TValue() {
                for (auto slot : R_) {
                    delete reinterpret_cast<T*>(slot->P_);
                    slot->P_ = nullptr;
                }
                with_lock (FreeM_) {
                    Free_.push_back(I_);
                }
            }

            T* Get() {
                Y_ENSURE(Local_, "no thread-local root");
                return reinterpret_cast<T*>(Local_[I_].P_);
            }

            T* Reset(T* ptr = nullptr) {
                Y_ENSURE(Local_, "no thread-local root");
                auto& slot = Local_[I_];
                if (slot.P_ && !ptr) {
                    with_lock (M_) {
                        R_.erase(&slot);
                    }
                }
                if (!slot.P_ && ptr) {
                    with_lock (M_) {
                        R_.insert(&slot);
                    }
                }
                delete reinterpret_cast<T*>(slot.P_);
                slot = {ptr, this, [](void* d) {
                    reinterpret_cast<TValue*>(d)->Reset(nullptr);
                }};
                return ptr;
            }

            const T* Get() const {
                return const_cast<TValue*>(this)->Get();
            }

            template <typename... Args>
            T& GetOrCreate(Args&&... args) {
                auto p = Get();
                return p ? *p : *Reset(new T(std::forward<Args>(args)...));
            }

            T& operator*() {
                return *Get();
            }

            const T& operator*() const {
                return *Get();
            }

            T* operator->() {
                return Get();
            }

            const T* operator->() const {
                return Get();
            }

        private:
            size_t I_;
            TMutex M_;
            THashSet<TSlot*> R_;
        };

    public:
        TThreadLocalRoot() {
            Y_ENSURE(!Local_, "already have a thread-local root");
            Local_ = Slots_.data();
        }

        ~TThreadLocalRoot() {
            for (auto& s : Slots_) {
                if (s.P_) {
                    s.F_(s.D_);
                }
            }
            Local_ = nullptr;
        }

    private:
        static constexpr const size_t SlotCount_ = 8192;
        static TMutex FreeM_;
        static TVector<size_t> Free_;
        static thread_local TSlot* Local_;
        TVector<TSlot> Slots_{SlotCount_};
    };

    template <typename T>
    using TThreadLocal = TThreadLocalRoot::TValue<T>;

    // If the string matches one of the provided constant values, return that value.
    // Otherwise, return an empty string.
    static inline TStringBuf Interned(TStringBuf) noexcept {
        return {};
    }

    template <typename... Ts>
    static inline TStringBuf Interned(TStringBuf m, TStringBuf a, Ts... more) noexcept {
        return m == a ? a : Interned(m, more...);
    }
}
