#include "yasm_aggregator.h"

#include <solomon/libs/cpp/sync/rw_lock.h>

#include <infra/yasm/common/points/hgram/ugram/compress/compress.h>

#include <library/cpp/containers/absl_flat_hash/flat_hash_map.h>
#include <library/cpp/monlib/metrics/log_histogram_collector.h>
#include <library/cpp/monlib/metrics/summary_snapshot.h>
#include <library/cpp/regex/pire/regexp.h>
#include <library/cpp/threading/atomic_shared_ptr/atomic_shared_ptr.h>

#include <mutex>
#include <cmath>
#include <limits>
#include <atomic>
#include <unordered_map>
#include <variant>

using NMonitoring::ISummaryDoubleSnapshotPtr;
using NMonitoring::TSummaryDoubleSnapshot;
using NMonitoring::ISummaryDoubleSnapshot;
using NMonitoring::TLogHistogramSnapshot;
using NMonitoring::TLogHistogramSnapshotPtr;
using NMonitoring::IHistogramSnapshot;
using NMonitoring::IHistogramSnapshotPtr;
using NMonitoring::EMetricType;
using NMonitoring::TBucketBound;
using NMonitoring::TBucketBounds;
using NMonitoring::TBucketValues;
using NMonitoring::TBucketValue;

namespace NSolomon::NIngestor {

class TMaxCollector {
public:
    TMaxCollector() {
        Value_.store(std::numeric_limits<double>::lowest(), std::memory_order_relaxed);
        Count_.store(0u, std::memory_order_relaxed);
    }

    i64 Collect(double candidate) noexcept {
        double oldValue = Value_.load(std::memory_order_relaxed);
        do {
            if (oldValue >= candidate) {
                break;
            }
        } while (!Value_.compare_exchange_weak(
                oldValue,
                candidate,
                std::memory_order_release,
                std::memory_order_consume));
        Count_.fetch_add(1u, std::memory_order_relaxed);
        return 0;
    }

    size_t Count() const noexcept {
        return Count_.load(std::memory_order_relaxed);
    }

    double Snapshot() const noexcept {
        double max = Value_.load(std::memory_order_relaxed);
        return max;
    }

    size_t SizeBytes() const {
        return sizeof(*this);
    }

private:
    std::atomic<double> Value_;
    std::atomic<size_t> Count_;
};

class TMinCollector {
public:
    TMinCollector() {
        Value_.store(std::numeric_limits<double>::max(), std::memory_order_relaxed);
        Count_.store(0u, std::memory_order_relaxed);
    }

    i64 Collect(double candidate) noexcept {
        double oldValue = Value_.load(std::memory_order_relaxed);
        do {
            if (oldValue <= candidate) {
                break;
            }
        } while (!Value_.compare_exchange_weak(
                oldValue,
                candidate,
                std::memory_order_release,
                std::memory_order_consume));
        Count_.fetch_add(1u, std::memory_order_relaxed);
        return 0;
    }

    size_t Count() const noexcept {
        return Count_.load(std::memory_order_relaxed);
    }

    double Snapshot() const noexcept {
        double min = Value_.load(std::memory_order_relaxed);
        return min;
    }

    size_t SizeBytes() const {
        return sizeof(*this);
    }

private:
    std::atomic<double> Value_;
    std::atomic<size_t> Count_;
};

class TSumCollector {
public:
    TSumCollector() {
        Value_.store(0.0, std::memory_order_relaxed);
        Count_.store(0u, std::memory_order_relaxed);
    }

    i64 Collect(double add) noexcept {
        double newValue;
        double oldValue = Value_.load(std::memory_order_relaxed);
        do {
            newValue = oldValue + add;
        } while (!Value_.compare_exchange_weak(
                oldValue,
                newValue,
                std::memory_order_release,
                std::memory_order_consume));
        Count_.fetch_add(1u, std::memory_order_relaxed);
        return 0;
    }

    size_t Count() const noexcept {
        return Count_.load(std::memory_order_relaxed);
    }

    double Snapshot() const noexcept {
        double sum = Value_.load(std::memory_order_relaxed);
        return sum;
    }

    size_t SizeBytes() const {
        return sizeof(*this);
    }

private:
    std::atomic<double> Value_;
    std::atomic<size_t> Count_;
};

class TAvgCollector {
public:
    TAvgCollector() {
        Sum_.store(0);
        Count_.store(0);
    }

    i64 Collect(double add) noexcept {
        double newValue;
        double oldValue = Sum_.load(std::memory_order_relaxed);
        do {
            newValue = oldValue + add;
        } while (!Sum_.compare_exchange_weak(
                oldValue,
                newValue,
                std::memory_order_release,
                std::memory_order_consume));
        Count_.fetch_add(1u, std::memory_order_relaxed);
        return 0;
    }

    size_t Count() const noexcept {
        return Count_.load(std::memory_order_relaxed);
    }

    ISummaryDoubleSnapshotPtr Snapshot() const noexcept {
        double sum = Sum_.load(std::memory_order_relaxed);
        size_t count = Count_.load(std::memory_order_relaxed);
        return MakeIntrusive<TSummaryDoubleSnapshot>(sum, 0.0, 0.0, 0.0, count);
    }

    size_t SizeBytes() const {
        return sizeof(*this);
    }

private:
    std::atomic<double> Sum_;
    std::atomic<size_t> Count_;
};

class TLastCollector {
public:
    TLastCollector() {
        Value_.store(0.0, std::memory_order_relaxed);
        Count_.store(0);
    }

    i64 Collect(double value) noexcept {
        Value_.store(value, std::memory_order_relaxed);
        Count_.fetch_add(1u, std::memory_order_relaxed);
        return 0;
    }

    size_t Count() const noexcept {
        return Count_.load(std::memory_order_relaxed);
    }

    double Snapshot() const noexcept {
        double value = Value_.load(std::memory_order_relaxed);
        return value;
    }

    size_t SizeBytes() const {
        return sizeof(*this);
    }

private:
    std::atomic<double> Value_;
    std::atomic<size_t> Count_;
};

class THistogramCollectors {
public:
    class THistogramMergeCollector {
    public:
        i64 Collect(IHistogramSnapshot* snapshot) {
            std::lock_guard g(Mutex_);
            i64 bytes = AllocatedBytes();
            Merge(snapshot);
            TUgramCompressor::Compress(Bounds_, Values_);
            ++Count_;
            return (i64) AllocatedBytes() - bytes;
        }

        i64 Collect(TLogHistogramSnapshot* snapshot) {
            return Collect(LogHist2Hist(snapshot).Get());
        }

        IHistogramSnapshotPtr Snapshot() const {
            std::lock_guard g(Mutex_);
            return NMonitoring::ExplicitHistogramSnapshot(Bounds_, Values_);
        }

        size_t Count() const noexcept {
            std::lock_guard g(Mutex_);
            return Count_;
        }

        size_t SizeBytes() const {
            return sizeof(*this);
        }

    private:
        TBucketBounds Bounds_;
        TBucketValues Values_;
        size_t Count_{0};
        mutable std::mutex Mutex_;

        void Merge(IHistogramSnapshot* rhs) {
            TBucketBounds newBounds;
            TBucketValues newValues;
            newBounds.reserve(Bounds_.size() + rhs->Count());
            newValues.reserve(Bounds_.size() + rhs->Count());
            TBucketBound MIN_VALUE = std::numeric_limits<TBucketBound>::lowest();
            auto prevUpper = MIN_VALUE;

            size_t lhsIdx = 0;
            size_t rhsIdx = 0;
            while (lhsIdx < Bounds_.size() && rhsIdx < rhs->Count()) {
                auto leftUpper = Bounds_[lhsIdx];
                auto leftLower = lhsIdx == 0 ? MIN_VALUE : Bounds_[lhsIdx - 1];
                auto rightUpper = rhs->UpperBound(rhsIdx);
                auto rightLower = rhsIdx == 0 ? MIN_VALUE : rhs->UpperBound(rhsIdx - 1);
                auto nextUpper = std::min(leftUpper, rightUpper);

                double value = 0.;
                if (rightLower <= prevUpper && nextUpper <= rightUpper) {
                    value += CalculateValue(prevUpper, nextUpper, rightLower, rightUpper, rhs->Value(rhsIdx));
                }
                if (leftLower <= prevUpper && nextUpper <= leftUpper) {
                    value += CalculateValue(prevUpper, nextUpper, leftLower, leftUpper, Values_[lhsIdx]);
                }

                if (leftUpper < rightUpper) {
                    ++lhsIdx;
                } else if (leftUpper > rightUpper) {
                    ++rhsIdx;
                } else {
                    ++lhsIdx;
                    ++rhsIdx;
                }

                newBounds.emplace_back(nextUpper);
                newValues.emplace_back(round(value));
                prevUpper = nextUpper;
            }

            while (lhsIdx < Bounds_.size()) {
                auto upper = Bounds_[lhsIdx];
                auto lower = lhsIdx == 0 ? MIN_VALUE : Bounds_[lhsIdx - 1];
                newBounds.emplace_back(upper);
                newValues.emplace_back(round(CalculateValue(prevUpper, upper, lower, upper, Values_[lhsIdx])));
                prevUpper = upper;
                ++lhsIdx;
            }

            while (rhsIdx < rhs->Count()) {
                auto upper = rhs->UpperBound(rhsIdx);
                auto lower = rhsIdx == 0 ? MIN_VALUE : rhs->UpperBound(rhsIdx - 1);
                newBounds.emplace_back(upper);
                newValues.emplace_back(round(CalculateValue(prevUpper, upper, lower, upper, rhs->Value(rhsIdx))));
                prevUpper = upper;
                ++rhsIdx;
            }

            Bounds_ = std::move(newBounds);
            Values_ = std::move(newValues);
        }

        ui64 AllocatedBytes() const {
            return sizeof(TBucketBound) * Bounds_.capacity() + sizeof(TBucketValue) * Values_.capacity();
        }

        static double CalculateValue(
                TBucketBound newLower,
                TBucketBound newUpper,
                TBucketBound oldLower,
                TBucketBound oldUpper,
                TBucketValue value)
        {
            Y_VERIFY_DEBUG(newLower < newUpper);
            Y_VERIFY_DEBUG(newLower >= oldLower && newUpper <= oldUpper);
            Y_VERIFY_DEBUG(oldLower < oldUpper);
            if (value == 0) {
                return 0.;
            }
            double res = value * ((newUpper - newLower) / (oldUpper - oldLower));
            return res;
        }
    };

    class TLogHistogramCollector {
    public:
        TLogHistogramCollector(int startPower = NMonitoring::TLogHistogramCollector::DEFAULT_START_POWER)
            : Impl_(startPower)
        {
        }

        i64 Collect(TLogHistogramSnapshot* lh) {
            Count_.fetch_add(1u, std::memory_order_relaxed);
            Impl_.Collect(lh);
            return 0;
        }

        size_t Count() const noexcept {
            return Count_.load(std::memory_order_relaxed);
        }

        TLogHistogramSnapshotPtr Snapshot() const {
            return Impl_.Snapshot();
        }

        size_t SizeBytes() const {
            size_t res = sizeof(*this);
            // TODO: @malovb not considered TVector<double> Buckets_;
            return res;
        }

    private:
        std::atomic<size_t> Count_;
        NMonitoring::TLogHistogramCollector Impl_;
    };

    class TCommonSnapshot {
    public:
        TCommonSnapshot(IHistogramSnapshotPtr snapshot) {
            Snapshot_.emplace<IHistogramSnapshotPtr>(std::move(snapshot));
        }

        TCommonSnapshot(TLogHistogramSnapshotPtr snapshot) {
            Snapshot_.emplace<TLogHistogramSnapshotPtr>(std::move(snapshot));
        }

        IHistogramSnapshotPtr ExtractHistogram() {
            return std::move(std::get<IHistogramSnapshotPtr>(Snapshot_));
        }

        TLogHistogramSnapshotPtr ExtractLogHistogram() {
            return std::move(std::get<TLogHistogramSnapshotPtr>(Snapshot_));
        }

        EMetricType GetType() const {
            auto* hsPtr = std::get_if<IHistogramSnapshotPtr>(&Snapshot_);
            if (hsPtr) {
                return EMetricType::HIST;
            }
            auto* lhsPtr = std::get_if<TLogHistogramSnapshotPtr>(&Snapshot_);
            if (lhsPtr) {
                return EMetricType::LOGHIST;
            }
            Y_FAIL("TCommonSnapshot must be initialized");
        }

    private:
        std::variant<IHistogramSnapshotPtr, TLogHistogramSnapshotPtr> Snapshot_;
    };

    using TCommonSnapshotPtr = THolder<TCommonSnapshot>;

    class TSumCollector {
    public:
        TSumCollector() noexcept {
            Count_.store(0u, std::memory_order_relaxed);
        }

        i64 Collect(IHistogramSnapshot* snapshot) {
            Count_.fetch_add(1u, std::memory_order_relaxed);
            if (auto sizeDiffBytes = CollectHist(snapshot)) {
                return *sizeDiffBytes;
            }

            i64 sizeDiffBytes = SetHistCollector();
            if (auto collectDiffBytes = CollectHist(snapshot)) {
                sizeDiffBytes += *collectDiffBytes;
            }
            return sizeDiffBytes;
        }

        i64 Collect(TLogHistogramSnapshot* snapshot) {
            Count_.fetch_add(1u, std::memory_order_relaxed);
            if (auto sizeDiffBytes = CollectLogHist(snapshot)) {
                return *sizeDiffBytes;
            }

            i64 sizeDiffBytes = SetLogHistCollector();
            if (auto collectDiffBytes = CollectLogHist(snapshot)) {
                sizeDiffBytes += *collectDiffBytes;
            }
            return sizeDiffBytes;
        }

        size_t Count() const {
            return Count_.load(std::memory_order_relaxed);
        }

        TCommonSnapshotPtr Snapshot() const {
            if (auto collector = HistCollector_) {
                return MakeHolder<TCommonSnapshot>(collector->Snapshot());
            }
            if (auto collector = LogHistCollector_) {
                return MakeHolder<TCommonSnapshot>(collector->Snapshot());
            }
            // some default value
            return MakeHolder<TCommonSnapshot>(NMonitoring::ExplicitHistogramSnapshot({}, {}));
        }

        size_t SizeBytes() const {
            size_t res = sizeof(*this);
            if (auto collector = HistCollector_) {
                res += collector ->SizeBytes();
                Y_VERIFY(res < (1l << 34));
            }
            if (auto collector = LogHistCollector_) {
                res += collector->SizeBytes();
                Y_VERIFY(res < (1l << 34));
            }
            return res;
        }

    private:
        TTrueAtomicSharedPtr<THistogramMergeCollector> HistCollector_;
        TTrueAtomicSharedPtr<TLogHistogramCollector> LogHistCollector_;
        std::atomic<size_t> Count_;

        std::optional<i64> CollectHist(IHistogramSnapshot* snapshot) {
            if (auto collector = HistCollector_) {
                return collector->Collect(snapshot);
            }
            return std::nullopt;
        }

        std::optional<i64> CollectLogHist(TLogHistogramSnapshot* snapshot) {
            if (auto collector = LogHistCollector_) {
                return collector->Collect(snapshot);
            }
            if (auto collector = HistCollector_) {
                return collector->Collect(snapshot);
            }
            return std::nullopt;
        }

        i64 SetHistCollector() {
            TTrueAtomicSharedPtr<THistogramMergeCollector> histCollector{new THistogramMergeCollector};
            i64 sizeDiffBytes = histCollector->SizeBytes();

            TTrueAtomicSharedPtr<TLogHistogramCollector> logHistCollector;
            LogHistCollector_.swap(logHistCollector);

            if (logHistCollector) {
                sizeDiffBytes +=
                        histCollector->Collect(LogHist2Hist(logHistCollector->Snapshot().Get()).Get()) -
                        logHistCollector->SizeBytes();
            }

            HistCollector_ = histCollector;
            return sizeDiffBytes;
        }

        i64 SetLogHistCollector() {
            TTrueAtomicSharedPtr<TLogHistogramCollector> logHistCollector{new TLogHistogramCollector};
            LogHistCollector_ = logHistCollector;
            return logHistCollector->SizeBytes();
        }
    };

    class TUgramCompressor {
    public:
        static void Compress(TBucketBounds& bounds, TBucketValues& values) {
            if (bounds.size() < NMonitoring::HISTOGRAM_MAX_BUCKETS_COUNT) {
                return;
            }

            DeleteZeroBuckets(bounds, values);
            if (bounds.size() < NMonitoring::HISTOGRAM_MAX_BUCKETS_COUNT) {
                return;
            }

            auto ugram = ToUgramBuckets(bounds, values);
            auto compressed = NZoom::NHgram::TUgramCompressor::GetInstance().Compress(
                    ugram,
                    NMonitoring::HISTOGRAM_MAX_BUCKETS_COUNT - 2
            );

            auto [compressedBounds, compressedValues] = Ugram2HistBounds(compressed);
            bounds = std::move(compressedBounds);
            values = std::move(compressedValues);
        }

    private:
        static void DeleteZeroBuckets(TBucketBounds& bounds, TBucketValues& values) {
            if (values.empty()) {
                return;
            }

            size_t nonZeroIndex = 0;
            size_t n = values.size() - 1;
            for (size_t i = 0; i < n; ++i) {
                while (i < n && values[i] == 0) {
                    ++i;
                }

                if (i >= n) {
                    break;
                }

                values[nonZeroIndex] = values[i];
                Y_VERIFY_DEBUG(values[nonZeroIndex] != 0);
                bounds[nonZeroIndex] = bounds[i];
                ++nonZeroIndex;
            }
            bounds[nonZeroIndex] = bounds[n];
            values[nonZeroIndex] = values[n];
            ++nonZeroIndex;

            values.resize(nonZeroIndex);
            bounds.resize(nonZeroIndex);
        }

        static void StupidCompress(TBucketBounds& bounds, TBucketValues& values) {
            if (bounds.size() > NMonitoring::HISTOGRAM_MAX_BUCKETS_COUNT) {
                auto n = NMonitoring::HISTOGRAM_MAX_BUCKETS_COUNT - 1;
                bounds[n] = bounds.back();
                values[n] = std::accumulate(values.begin() + n, values.end(), 0ull);

                bounds.resize(n + 1);
                values.resize(n + 1);
            }
        }
    };

    static NZoom::NHgram::TUgramBuckets ToUgramBuckets(const TBucketBounds& bounds, const TBucketValues& values) {
        NZoom::NHgram::TUgramBuckets yasmBuckets;
        if (bounds.empty()) {
            return yasmBuckets;
        }
        yasmBuckets.reserve(bounds.size());

        double lowerBound;
        double upperBound = MIN_DOUBLE;

        for (size_t i = 0; i < bounds.size(); ++i) {
            lowerBound = upperBound;
            upperBound = bounds[i];
            if (upperBound == std::nextafter(lowerBound, MAX_DOUBLE)) {
                upperBound = lowerBound;
            }

            if (values[i] == 0) {
                continue;
            }

            yasmBuckets.emplace_back(lowerBound, upperBound, values[i]);
        }

        return yasmBuckets;
    }

    static std::pair<TBucketBounds, TBucketValues> Ugram2HistBounds(const NZoom::NHgram::TUgramBuckets& ugram) {
        if (ugram.empty()) {
            return {TBucketBounds{NMonitoring::HISTOGRAM_INF_BOUND}, TBucketValues{0u}};
        }

        TBucketBounds bounds;
        TBucketValues values;
        bounds.reserve(ugram.size() + 1);
        values.reserve(ugram.size() + 1);

        for (const auto& bucket: ugram) {
            const auto lower = bucket.LowerBound;
            auto upper = bucket.UpperBound;
            if (upper == lower) {
                upper = std::nextafter(upper, MAX_DOUBLE);
            }

            bounds.emplace_back(upper);
            values.emplace_back((TBucketValue) round(bucket.Weight));
        }

        if (bounds.back() != NMonitoring::HISTOGRAM_INF_BOUND) {
            bounds.emplace_back(NMonitoring::HISTOGRAM_INF_BOUND);
            values.emplace_back(0u);
        }

        return {bounds, values};
    }

private:
    static IHistogramSnapshotPtr LogHist2Hist(TLogHistogramSnapshot* logHist) {
        TBucketBounds bounds;
        TBucketValues values;

        // add bucket with 0
        bounds.emplace_back(Min<TBucketBound>());
        values.emplace_back(logHist->ZerosCount());

        for (size_t i = 0; i < logHist->Count(); ++i) {
            bounds.emplace_back(logHist->UpperBound(i));
            values.emplace_back(logHist->Bucket(i));
        }

        // add inf bucket
        bounds.emplace_back(NMonitoring::HISTOGRAM_INF_BOUND);
        values.emplace_back(0);

        return NMonitoring::ExplicitHistogramSnapshot(std::move(bounds), std::move(values));
    }

    static constexpr double MAX_DOUBLE = std::numeric_limits<double>::max();
    static constexpr double MIN_DOUBLE = std::numeric_limits<double>::lowest();
};

class TSummaryCollectors {
public:
    class TSumCollector {
    public:
        TSumCollector() {
            Sum_.store(0, std::memory_order_relaxed);
            Min_.store(std::numeric_limits<double>::max(), std::memory_order_relaxed);
            Max_.store(std::numeric_limits<double>::lowest(), std::memory_order_relaxed);
            Count_.store(0, std::memory_order_relaxed);
            SummariesCount_.store(0, std::memory_order_relaxed);
        }

        i64 Collect(ISummaryDoubleSnapshot *snapshot) noexcept {
            UpdateSum(Sum_, snapshot->GetSum());
            UpdateMin(Min_, snapshot->GetMin());
            UpdateMax(Max_, snapshot->GetMax());
            Last_.store(snapshot->GetLast(), std::memory_order_relaxed);
            Count_.fetch_add(snapshot->GetCount(), std::memory_order_relaxed);
            SummariesCount_.fetch_add(1u, std::memory_order_relaxed);
            return 0;
        }

        size_t Count() const noexcept {
            return SummariesCount_.load(std::memory_order_relaxed);
        }

        ISummaryDoubleSnapshotPtr Snapshot() const noexcept {
            return MakeIntrusive<TSummaryDoubleSnapshot>(
                    Sum_.load(std::memory_order_relaxed),
                    Min_.load(std::memory_order_relaxed),
                    Max_.load(std::memory_order_relaxed),
                    Last_.load(std::memory_order_relaxed),
                    Count_.load(std::memory_order_relaxed));
        }

        size_t SizeBytes() const {
            return sizeof(*this);
        }

    private:
        std::atomic<double> Sum_;
        std::atomic<double> Min_;
        std::atomic<double> Max_;
        std::atomic<double> Last_;
        std::atomic_uint64_t Count_;
        std::atomic<size_t> SummariesCount_;
    };

    class TLastCollector {
    public:
        TLastCollector()
            : Summary_(0.0, 0.0, 0.0, 0.0, 0u) {}

        i64 Collect(ISummaryDoubleSnapshot* snapshot) noexcept {
            std::lock_guard g(Mutex_);
            Summary_ = TSummaryDoubleSnapshot(
                    snapshot->GetSum(),
                    snapshot->GetMin(),
                    snapshot->GetMax(),
                    snapshot->GetLast(),
                    snapshot->GetCount());
            ++Count_;
            return 0;
        }

        size_t Count() const noexcept {
            std::lock_guard g(Mutex_);
            return Count_;
        }

        ISummaryDoubleSnapshotPtr Snapshot() const noexcept {
            std::lock_guard g(Mutex_);
            return MakeIntrusive<TSummaryDoubleSnapshot>(Summary_);
        }

        size_t SizeBytes() const {
            return sizeof(*this);
        }

    private:
        TSummaryDoubleSnapshot Summary_;
        size_t Count_;

        // TODO: write collector without mutex
        mutable std::mutex Mutex_;
    };

private:
    static void UpdateSum(std::atomic<double>& sum, double add) noexcept {
        double newValue;
        double oldValue = sum.load(std::memory_order_relaxed);
        do {
            newValue = oldValue + add;
        } while (!sum.compare_exchange_weak(
                oldValue,
                newValue,
                std::memory_order_release,
                std::memory_order_consume));
    }

    static void UpdateMin(std::atomic<double>& min, double candidate) noexcept {
        double oldValue = min.load(std::memory_order_relaxed);
        do {
            if (oldValue <= candidate) {
                break;
            }
        } while (!min.compare_exchange_weak(
                oldValue,
                candidate,
                std::memory_order_release,
                std::memory_order_consume));
    }

    static void UpdateMax(std::atomic<double>& max, double candidate) noexcept {
        double oldValue = max.load(std::memory_order_relaxed);
        do {
            if (oldValue >= candidate) {
                break;
            }
        } while (!max.compare_exchange_weak(
                oldValue,
                candidate,
                std::memory_order_release,
                std::memory_order_consume));
    }
};

template <class TCollector>
class TAggrsTable {
public:
    static constexpr size_t SHARDS_COUNT = 32;

    struct TAggrsTableValue {
        THolder<TCollector> Collector;
        EMetricType Type;
    };

    using TCollectorPtr = THolder<TCollector>;
    using TSnapshotPtr = decltype(TCollectorPtr()->Snapshot());
    using TAggregate = TAggregate<TSnapshotPtr>;

public:
    class TShardedAggrsTable {
        using TTableShard = NSync::TLightRwLock<std::unordered_map<TLabels, TAggrsTableValue>>;
    public:
        std::pair<TCollector*, size_t> Emplace(TLabels labels, EMetricType metricType) {
            static_assert(IsPowerOf2(SHARDS_COUNT), "shard count mut be power of two");
            size_t index = std::hash<TLabels>()(labels) & (SHARDS_COUNT - 1);

            {
                auto table = TableShards_[index].Read();
                if (auto it = table->find(labels); it != table->end()) {
                    return {it->second.Collector.Get(), 0};
                }
            }

            TCollectorPtr collector = MakeHolder<TCollector>();
            size_t newSize = labels.SizeBytes() + sizeof(TAggrsTableValue) + collector->SizeBytes();

            auto table = TableShards_[index].Write();
            auto [it, _] = table->emplace(std::move(labels), TAggrsTableValue{std::move(collector), metricType});
            return {it->second.Collector.Get(), newSize};
        }

        auto& operator[](size_t idx) noexcept {
            return TableShards_[idx];
        }

        size_t Size() const {
            return TableShards_.size();
        }

    private:
        std::array<TTableShard, SHARDS_COUNT> TableShards_;
    };

    using TTable = TShardedAggrsTable;

public:
    TAggrsTable()
        : Table_{std::make_unique<TShardedAggrsTable>()}
    {
    }

    template <class T>
    void Collect(T value, TLabels labels, EMetricType metricType) {
        auto [collector, newSize] = Table_->Emplace(labels, metricType);
        if (newSize) {
            SizeBytes_.fetch_add(newSize, std::memory_order_relaxed);
        }

        if (i64 sizeDiffBytes = collector->Collect(value)) {
            SizeBytes_.fetch_add(sizeDiffBytes, std::memory_order_relaxed);
        }
    }

    std::unique_ptr<TShardedAggrsTable> ReleaseAggrsTable() {
        SizeBytes_.store(0, std::memory_order_relaxed);
        return std::exchange(Table_, std::make_unique<TShardedAggrsTable>());
    }

    size_t SizeBytes() const {
        i64 sizeBytes = SizeBytes_.load(std::memory_order_relaxed);
        return std::max<i64>(sizeBytes, 0);
    }

private:
    std::unique_ptr<TShardedAggrsTable> Table_;
    std::atomic<i64> SizeBytes_{0};
};

static const NRegExp::TFsm VALID_SUFFIX(R"(.[vehmntx]{3}|summ|hgram|max)");

static TStringBuf GetSignalName(const TLabels& labels) {
    for (size_t i = 0; i < labels.size(); ++i) {
        if (labels[i].first == labels.LabelPool()->SignalIntTag) {
            return labels.Value(i);
        }
    }
    return {};
}

static TStringBuf GetSuffix(const TLabels& labels) {
    TStringBuf signalName = GetSignalName(labels);
    TStringBuf r;
    TStringBuf l;
    signalName.RSplit('_', l, r);
    return r;
}

static bool ValidateSuffix(TStringBuf suffix) {
    return NRegExp::TMatcher(VALID_SUFFIX).Match(suffix).Final();
}

enum class EYasmAggregationType {
    GROUP = 1,
    METAGROUP = 2,
    ROLLUP = 3
};

enum class EYasmCollectorType {
    LOG_HIST,
    MAX,
    MIN,
    SUM,
    LAST,
    AVG,
};

static EYasmCollectorType CollectorType(TStringBuf suffix, EYasmAggregationType type) {
    if (suffix == TStringBuf("summ")) {
        return EYasmCollectorType::SUM;
    } else if (suffix == TStringBuf("hgram")) {
        return EYasmCollectorType::LOG_HIST;
    } else if (suffix == TStringBuf("max")) {
        return EYasmCollectorType::MAX;
    }

    char s = suffix[(ui32) type];
    switch (s) {
        case 'h':
            return EYasmCollectorType::LOG_HIST;
        case 'x':
            return EYasmCollectorType::MAX;
        case 'n':
            return EYasmCollectorType::MIN;
        case 'm':
            return EYasmCollectorType::SUM;
        case 't':
            return EYasmCollectorType::LAST;
        case 'v':
            return EYasmCollectorType::AVG;
        case 'e':
            return EYasmCollectorType::SUM;
        default:
            ythrow TInvalidSuffixError() << "invalid aggregation suffix";
    }
}

ISummaryDoubleSnapshotPtr GaugeToSummary(const TLabels& labels, double gauge) {
    TStringBuf suffix = GetSuffix(labels);
    Y_ENSURE(ValidateSuffix(suffix), "invalid aggregation suffix, labels:" << labels.AsString());
    EYasmCollectorType rollUp = CollectorType(suffix, EYasmAggregationType::ROLLUP);

    switch (rollUp) {
        case EYasmCollectorType::SUM:
            return MakeIntrusive<TSummaryDoubleSnapshot>(gauge, 0.0, 0.0, 0.0, 0u);
        case EYasmCollectorType::AVG:
            return MakeIntrusive<TSummaryDoubleSnapshot>(gauge, 0.0, 0.0, 0.0, 1u);
        case EYasmCollectorType::MIN:
            return MakeIntrusive<TSummaryDoubleSnapshot>(0.0, gauge, 0.0, 0.0, 0u);
        case EYasmCollectorType::MAX:
            return MakeIntrusive<TSummaryDoubleSnapshot>(0.0, 0.0, gauge, 0.0, 0u);
        case EYasmCollectorType::LAST:
            return MakeIntrusive<TSummaryDoubleSnapshot>(0.0, 0.0, 0.0, gauge, 0u);
        case EYasmCollectorType::LOG_HIST:
            throw yexception() << "try to convert gauge to summary with hgram rollup, labels:" << labels.AsString();
    }
}

class TYasmAggregates: public IYasmAggregates {
public:
    TYasmAggregates(
            std::unique_ptr<TAggrsTable<TMaxCollector>::TTable> maxTable,
            std::unique_ptr<TAggrsTable<TMinCollector>::TTable> minTable,
            std::unique_ptr<TAggrsTable<TSumCollector>::TTable> sumTable,
            std::unique_ptr<TAggrsTable<TAvgCollector>::TTable> avgTable,
            std::unique_ptr<TAggrsTable<TLastCollector>::TTable> lastTable,
            std::unique_ptr<TAggrsTable<THistogramCollectors::TSumCollector>::TTable> histsTable,
            std::unique_ptr<TAggrsTable<TSummaryCollectors::TSumCollector>::TTable> summarySumTable,
            std::unique_ptr<TAggrsTable<TSummaryCollectors::TLastCollector>::TTable> summaryLastTable)
            : MaxTable_(std::move(maxTable))
            , MinTable_(std::move(minTable))
            , SumTable_(std::move(sumTable))
            , AvgTable_(std::move(avgTable))
            , LastTable_(std::move(lastTable))
            , HistsTable_(std::move(histsTable))
            , SummarySumTable_(std::move(summarySumTable))
            , SummaryLastTable_(std::move(summaryLastTable))
    {
    }

    void Consume(IYasmAggregatesProcessorConsumer& consumer) override {
        ConsumeGaugeAggregates(consumer, *MaxTable_);
        ConsumeGaugeAggregates(consumer, *MinTable_);
        ConsumeGaugeAggregates(consumer, *SumTable_);
        ConsumeSummaryAggregates(consumer, *AvgTable_);
        ConsumeGaugeAggregates(consumer, *LastTable_);
        ConsumeSummaryAggregates(consumer, *SummarySumTable_);
        ConsumeSummaryAggregates(consumer, *SummaryLastTable_);
        ConsumeHistTypeAggregates(consumer, *HistsTable_);
    }

private:
    std::unique_ptr<TAggrsTable<TMaxCollector>::TTable> MaxTable_;
    std::unique_ptr<TAggrsTable<TMinCollector>::TTable> MinTable_;
    std::unique_ptr<TAggrsTable<TSumCollector>::TTable> SumTable_;
    std::unique_ptr<TAggrsTable<TAvgCollector>::TTable> AvgTable_;
    std::unique_ptr<TAggrsTable<TLastCollector>::TTable> LastTable_;
    std::unique_ptr<TAggrsTable<THistogramCollectors::TSumCollector>::TTable> HistsTable_;
    std::unique_ptr<TAggrsTable<TSummaryCollectors::TSumCollector>::TTable> SummarySumTable_;
    std::unique_ptr<TAggrsTable<TSummaryCollectors::TLastCollector>::TTable> SummaryLastTable_;

    template <class TSummaryTable>
    static void ConsumeSummaryAggregates(IYasmAggregatesProcessorConsumer& consumer, TSummaryTable& table) {
        ConsumeTable(table, [&consumer](auto key, EMetricType type, auto collector) {
            consumer.OnSummary(TAggregate(std::move(key), collector->Snapshot(), type, collector->Count()));
        });
    }

    template <class TGaugeTable>
    static void ConsumeGaugeAggregates(IYasmAggregatesProcessorConsumer& consumer, TGaugeTable& table) {
        ConsumeTable(table, [&consumer](auto key, EMetricType type, auto collector) {
            ISummaryDoubleSnapshotPtr summary = GaugeToSummary(key, collector->Snapshot());
            consumer.OnSummary(TAggregate(std::move(key), std::move(summary), type, collector->Count()));
        });
    }

    template <class THistTable>
    static void ConsumeHistTypeAggregates(IYasmAggregatesProcessorConsumer& consumer, THistTable& table) {
        ConsumeTable(table, [&consumer](auto key, EMetricType type, auto collector) {
            auto snapshotPtr = collector->Snapshot();
            if (type == EMetricType::HIST) {
                consumer.OnHistogram(TAggregate(
                        std::move(key),
                        snapshotPtr->ExtractHistogram(),
                        EMetricType::HIST,
                        collector->Count()));
            } else {
                consumer.OnLogHistogram(TAggregate(
                        std::move(key),
                        snapshotPtr->ExtractLogHistogram(),
                        EMetricType::LOGHIST,
                        collector->Count()));
            }
        });
    }

    template <typename TTable, typename TConsumer>
    static void ConsumeTable(TTable& table, TConsumer&& consumer) {
        for (size_t i = 0; i < table.Size(); i++) {
            auto tableShard = table[i].Write();
            for (auto it = tableShard->begin(), end = tableShard->end(); it != end;) {
                auto node = tableShard->extract(it++);
                consumer(std::move(node.key()), node.mapped().Type, std::move(node.mapped().Collector));
            }
        }
    }
};

class TYasmAggrState::TImpl {
public:
    TImpl(TVector<TYasmAggrRule> rules)
        : Transformer_(std::move(rules))
    {
    }

    TYasmMetricTransformer* GetTransformer() {
        return &Transformer_;
    }

    template <class T>
    void Collect(T value, TLabels labels, EMetricType metricType) {
        TStringBuf suffix = GetSuffix(labels);
        if (!ValidateSuffix(suffix)) {
            ythrow TInvalidSuffixError();
        }

        Collect(value, std::move(labels), metricType, CollectorType(suffix, EYasmAggregationType::GROUP));
    }

    template <class T>
    void CollectTransform(T value, const TLabels& labels, EMetricType metricType) {
        auto aggLabelsSet = Transformer_.Transform(labels);
        auto it = aggLabelsSet.begin();
        while (it != aggLabelsSet.end()) {
            auto node = aggLabelsSet.extract(it++);
            Collect<T>(value, std::move(node.value()), metricType);
        }
    }

    IYasmAggregatesPtr ReleaseAggregates() {
        return MakeHolder<TYasmAggregates>(
                MaxTable_.ReleaseAggrsTable(),
                MinTable_.ReleaseAggrsTable(),
                SumTable_.ReleaseAggrsTable(),
                AvgTable_.ReleaseAggrsTable(),
                LastTable_.ReleaseAggrsTable(),
                HistsTable_.ReleaseAggrsTable(),
                SummarySumTable_.ReleaseAggrsTable(),
                SummaryLastTable_.ReleaseAggrsTable());
    }

    size_t SizeBytes() const {
        size_t res = MaxTable_.SizeBytes();
        res += MinTable_.SizeBytes();
        res += SumTable_.SizeBytes();
        res += AvgTable_.SizeBytes();
        res += LastTable_.SizeBytes();
        res += HistsTable_.SizeBytes();
        res += SummarySumTable_.SizeBytes();
        res += SummaryLastTable_.SizeBytes();
        res += Transformer_.SizeBytes();
        Y_VERIFY(res < (1l << 34));
        return res;
    }

private:
    TAggrsTable<TMaxCollector> MaxTable_;
    TAggrsTable<TMinCollector> MinTable_;
    TAggrsTable<TSumCollector> SumTable_;
    TAggrsTable<TAvgCollector> AvgTable_;
    TAggrsTable<TLastCollector> LastTable_;

    TAggrsTable<THistogramCollectors::TSumCollector> HistsTable_;

    TAggrsTable<TSummaryCollectors::TSumCollector> SummarySumTable_;
    TAggrsTable<TSummaryCollectors::TLastCollector> SummaryLastTable_;

    TYasmMetricTransformer Transformer_;

    void Collect(double value, TLabels labels, EMetricType metricType, EYasmCollectorType collectorType) {
        switch (collectorType) {
            case EYasmCollectorType::AVG: {
                AvgTable_.Collect(value, std::move(labels), metricType);
                break;
            }
            case EYasmCollectorType::LAST: {
                LastTable_.Collect(value, std::move(labels), metricType);
                break;
            }
            case EYasmCollectorType::SUM: {
                SumTable_.Collect(value, std::move(labels), metricType);
                break;
            }
            case EYasmCollectorType::MIN: {
                MinTable_.Collect(value, std::move(labels), metricType);
                break;
            }
            case EYasmCollectorType::MAX: {
                MaxTable_.Collect(value, std::move(labels), metricType);
                break;
            }
            case EYasmCollectorType::LOG_HIST: {
                ythrow TInvalidValueTypeError()
                        << "Unexpected value type for aggregation. Expected type is (LOG)HISTOGRAM, actual type is "
                        << NMonitoring::MetricTypeToStr(metricType) << ". Metric labels:" << labels.AsString();
            }
        }
    }

    void Collect(
            ISummaryDoubleSnapshot* summary,
            TLabels labels ,
            EMetricType metricType,
            EYasmCollectorType collectorType)
    {
        switch (collectorType) {
            case EYasmCollectorType::AVG:
            case EYasmCollectorType::SUM:
            case EYasmCollectorType::MIN:
            case EYasmCollectorType::MAX: {
                SummarySumTable_.Collect(summary, std::move(labels), metricType);
                break;
            }
            case EYasmCollectorType::LAST: {
                SummaryLastTable_.Collect(summary, std::move(labels), metricType);
                break;
            }
            case EYasmCollectorType::LOG_HIST: {
                ythrow TInvalidValueTypeError()
                        << "Unexpected value type for aggregation. Expected type is (LOG)HISTOGRAM, actual type is "
                        << NMonitoring::MetricTypeToStr(metricType) << ". Metric labels:" << labels.AsString();
            }
        }
    }

    void Collect(
            TLogHistogramSnapshot* logHist,
            TLabels labels,
            EMetricType metricType,
            EYasmCollectorType collectorType)
    {
        switch (collectorType) {
            case EYasmCollectorType::LOG_HIST: {
                HistsTable_.Collect(logHist, std::move(labels), metricType);
                break;
            }
            case EYasmCollectorType::LAST:
                // TODO: is it valid case?
            case EYasmCollectorType::AVG:
            case EYasmCollectorType::SUM:
            case EYasmCollectorType::MIN:
            case EYasmCollectorType::MAX: {
                ythrow TInvalidValueTypeError()
                        << "Invalid aggregation type. Aggregation types (sum, avg, min, max, last) not supported for (LOG)HISTOGRAM metric type"
                        << ". Metric labels:" << labels.AsString();
            }
        }
    }

    void Collect(
            IHistogramSnapshot* hist,
            TLabels labels,
            EMetricType metricType,
            EYasmCollectorType collectorType)
    {
        switch (collectorType) {
            case EYasmCollectorType::LOG_HIST: {
                HistsTable_.Collect(hist, std::move(labels), metricType);
                break;
            }
            case EYasmCollectorType::LAST:
                // TODO: is it valid case?
            case EYasmCollectorType::AVG:
            case EYasmCollectorType::SUM:
            case EYasmCollectorType::MIN:
            case EYasmCollectorType::MAX: {
                ythrow yexception()
                        << "Invalid aggregation type. Aggregation types (sum, avg, min, max, last) not supported for (LOG)HISTOGRAM metric type"
                        << ". Metric labels:" << labels.AsString();
            }
        }
    }
};

size_t TYasmAggrState::SizeBytes() const {
    size_t res = sizeof(*this);
    if (Impl_) {
        res += Impl_->SizeBytes();
    }
    Y_VERIFY(res < (1l << 34));
    return res;
}

TYasmAggrState::TYasmAggrState(TVector<TYasmAggrRule> rules)
        : Impl_(MakeHolder<TYasmAggrState::TImpl>(std::move(rules)))
{}

void TYasmAggrState::Collect(double value, const TLabels& labels, EMetricType metricType) {
    Impl_->Collect(value, labels, metricType);
}

void TYasmAggrState::CollectTransform(double value, const TLabels& labels, EMetricType metricType) {
    Impl_->CollectTransform(value, labels, metricType);
}

void TYasmAggrState::Collect(
        NMonitoring::TLogHistogramSnapshot* value,
        const TLabels& labels,
        NMonitoring::EMetricType metricType)
{
    Impl_->Collect(value, labels, metricType);
}

void TYasmAggrState::CollectTransform(
        NMonitoring::TLogHistogramSnapshot* value,
        const TLabels& labels,
        NMonitoring::EMetricType metricType)
{
    Impl_->CollectTransform(value, labels, metricType);
}

void TYasmAggrState::Collect(
        NMonitoring::ISummaryDoubleSnapshot* value,
        const TLabels& labels,
        NMonitoring::EMetricType metricType)
{
    Impl_->Collect(value, labels, metricType);
}

void TYasmAggrState::CollectTransform(
        NMonitoring::ISummaryDoubleSnapshot* value,
        const TLabels& labels,
        NMonitoring::EMetricType metricType)
{
    Impl_->CollectTransform(value, labels, metricType);
}

void TYasmAggrState::Collect(
        NMonitoring::IHistogramSnapshot* value,
        const TLabels& labels,
        NMonitoring::EMetricType metricType)
{
    Impl_->Collect(value, labels, metricType);
}

void TYasmAggrState::CollectTransform(
        NMonitoring::IHistogramSnapshot* value,
        const TLabels& labels,
        NMonitoring::EMetricType metricType)
{
    Impl_->CollectTransform(value, labels, metricType);
}

IYasmAggregatesPtr TYasmAggrState::ReleaseAggregates() {
    return Impl_->ReleaseAggregates();
}

TYasmAggrState::~TYasmAggrState() {}

TYasmMetricTransformer* TYasmAggrState::GetTransformer() {
    return Impl_->GetTransformer();
}

} // namespace NSolomon::NIngestor
