#include "compact.h"

#include <infra/yasm/histdb/components/streams/snappy.h>
#include <infra/yasm/zoom/components/compression/bit_stream.h>
#include <infra/yasm/zoom/components/compression/chunk.h>
#include <infra/yasm/zoom/components/compression/counted_sum.h>
#include <infra/yasm/zoom/components/compression/histograms.h>
#include <infra/yasm/zoom/components/compression/zoom_converters.h>
#include <infra/monitoring/common/msgpack.h>

#include <library/cpp/blockcodecs/codecs.h>

#include <util/ysaveload.h>
#include <util/stream/buffer.h>

using namespace NHistDb;
using namespace NTags;
using namespace NZoom::NSignal;
using namespace NZoom::NYasmConf;
using namespace NZoom::NValue;
using namespace NMonitoring;
using namespace NYasmServer;

namespace {
    static const NZoom::NValue::TValue EMPTY_VALUE;

    static constexpr size_t MAX_INSTANCES_TO_READ = 1000;
    static constexpr THeaderVersionStorage HEADER_COMPACT_VERSION {{10, 0, 0, 0}};
    static constexpr TDuration READ_TIMEOUT = TDuration::Seconds(25);

    inline size_t GetUncompressedSize(const TVector<TSnappyBlock>& blocks) noexcept {
        size_t size = 0;
        for (const auto& block : blocks) {
            size += block.UncompressedSize;
        }
        return size;
    }

    inline TMaybe<TStringBuf> BlobToBuf(TMaybe<TBlob> incoming) {
        if (incoming) {
            return TStringBuf(incoming->AsCharPtr(), incoming->Size());
        } else {
            return Nothing();
        }
    }

    struct TSeriesDescriptor {
        ui32 Signal;
        ui16 Timestamp;
        ui16 ValuesCount;
        ESeriesKind SeriesKind;
    };

    struct TTagState {
        TTagState(TInstanceKey instanceKey, size_t offset)
            : InstanceKey(instanceKey)
            , Offset(offset)
            , BufferStream(Buffer)
        {
        }

        const TInstanceKey InstanceKey;
        const size_t Offset;
        TString Buffer;
        TStringOutput BufferStream;
    };

    struct TSignalChunk {
        TSeriesDescriptor Descriptor;
        TString Data;
    };

    struct TRecordState {
        TVector<TSignalChunk> Chunks;
    };

    struct TInstanceState {
        THashSet<TAbstractFormat::TTimestamp> Matches;
        THashMap<TSignalName, TRecordState> Records;
    };

    using TInstanceStateMap = THashMap<TInstanceKey, TInstanceState>;

    TVector<NZoom::NValue::TValue> DecodeChunks(const TVector<TSignalChunk>& chunks) {
        TVector<NZoom::NValue::TValue> values;
        for (const auto& chunk : chunks) {
            for (const auto idx : xrange(chunk.Descriptor.Timestamp - values.size())) {
                values.emplace_back(EMPTY_VALUE.GetValue());
                Y_UNUSED(idx);
            }
            for (auto&& value : NYasmServer::DecodeChunk(chunk.Data, chunk.Descriptor.SeriesKind, chunk.Descriptor.ValuesCount)) {
                values.emplace_back(std::move(value));
            }
        }
        return values;
    }
}

class TCompactFormat::TImpl {
public:
    TImpl() {
    }

    TImpl(TMaybe<TStringBuf> incoming) {
        if (incoming.Defined()) {
            const auto decoded(GetCodec().Decode(*incoming));
            const auto message(msgpack::unpack(decoded.data(), decoded.size()));

            NMonitoring::EnsureIs(message.get(), msgpack::type::object_type::ARRAY);
            const auto root(message.get().via.array);

            LoadBlocks(root.ptr[0]);
            LoadSizes(root.ptr[1]);
            LoadSignals(root.ptr[2]);
            LoadSeries(root.ptr[3]);
            LoadTimestamps(root.ptr[4]);
        }
    }

    TVector<TReadData> ReadFiltered(const TVector<TTimestamp>& times, const TTagSignals& request, TSnappyInputStream& stream) const {
        const TVector<TSignalName> allSignals(CollectSignalNames(request));

        const auto deadline = READ_TIMEOUT.ToDeadLine();
        TInstanceStateMap instanceStates;
        TVector<TReadData> result;

        for (const auto requestKeyIndex : xrange(request.size())) {
            const auto& requestKeySignals = request[requestKeyIndex];

            NTags::TDynamicFilter filter(requestKeySignals.first);
            for (const auto& instanceKeyOffsets : Series) {
                filter.Feed(instanceKeyOffsets.first);
            }

            auto resolvedInstanceKeys = filter.Resolve();
            if (resolvedInstanceKeys.empty()) {
                continue;
            }

            if (resolvedInstanceKeys.size() > MAX_INSTANCES_TO_READ) {
                ythrow TSignalLimitExceeded() << "can't read more than " << MAX_INSTANCES_TO_READ
                                              << " instances per query " << requestKeySignals.first
                                              << " (" << resolvedInstanceKeys.size() << " to read)";
            }

            THashSet<TTimestamp> matchesSet;
            for (const auto& instanceKey : resolvedInstanceKeys) {
                TInstanceStateMap::insert_ctx ctx;
                auto it(instanceStates.find(instanceKey, ctx));
                if (it == instanceStates.end()) {
                    it = instanceStates.emplace_direct(ctx, instanceKey, ReadRecords(instanceKey, allSignals, stream, deadline));
                }
                matchesSet.insert(it->second.Matches.begin(), it->second.Matches.end());
            }

            if (matchesSet.empty()) {
                continue;
            }

            for (const auto& instanceKey : resolvedInstanceKeys) {
                auto& records(instanceStates[instanceKey].Records);

                TVector<std::pair<TSignalName, TVector<TValue>>> series;
                for (const auto signal : requestKeySignals.second) {
                    series.emplace_back(signal, DecodeChunks(records[signal].Chunks));
                }

                for (const auto timestamp : times) {
                    if (!matchesSet.contains(timestamp)) {
                        continue;
                    }

                    TVector<std::pair<TSignalName, TValue>> values;
                    for (auto& signalValues : series) {
                        if (timestamp < signalValues.second.size()) {
                            values.emplace_back(signalValues.first, std::move(signalValues.second[timestamp]));
                        }
                    }

                    result.emplace_back(
                        requestKeySignals.first,
                        std::move(values),
                        timestamp,
                        instanceKey,
                        requestKeyIndex
                    );
                }
            }
        }

        return result;
    }

    void Start(TInstanceKey instanceKey, TSnappyOutputStream& stream) {
        CurrentTagState.ConstructInPlace(instanceKey, stream.Position());
        CurrentTimestamps = Timestamps;
    }

    void Append(
        TSignalName signalName,
        TTimestamp timestamp,
        size_t valuesCount,
        ESeriesKind seriesKind,
        const TString& chunk
    ) {
        Y_VERIFY(timestamp < std::numeric_limits<ui16>::max());
        Y_VERIFY(valuesCount < std::numeric_limits<ui16>::max());

        const TSeriesDescriptor descriptor {
            .Signal = GetSignalOffset(signalName),
            .Timestamp = (ui16)timestamp,
            .ValuesCount = (ui16)valuesCount,
            .SeriesKind = seriesKind
        };

        ::SavePodType(&CurrentTagState->BufferStream, descriptor);
        ::SaveLoad(&CurrentTagState->BufferStream, chunk);

        if (CurrentTimestamps.size() < timestamp) {
            CurrentTimestamps.resize(timestamp, false);
        }
        if (CurrentTimestamps.size() < timestamp + valuesCount) {
            CurrentTimestamps.resize(timestamp + valuesCount, true);
        }
    }

    void Commit(TSnappyOutputStream& stream) {
        CurrentTagState->BufferStream.Finish();
        stream.Write(CurrentTagState->Buffer);
        Series[CurrentTagState->InstanceKey].emplace_back(CurrentTagState->Offset);
        SizeOffsets.emplace_back(CurrentTagState->Offset);
        CurrentTagState.Clear();
        CurrentTimestamps.swap(Timestamps);
    }

    TString Dump() {
        msgpack::sbuffer buffer;
        msgpack::packer<msgpack::sbuffer> packer(buffer);
        packer.pack_array(5);
        SaveBlocks(packer);
        SaveSizes(packer);
        SaveSignals(packer);
        SaveSeries(packer);
        SaveTimestamps(packer);
        return GetCodec().Encode(TStringBuf(buffer.data(), buffer.size()));
    }

    void SaveBlocks(const TVector<TSnappyBlock>& blocks) {
        Blocks = blocks;
    }

    const TVector<TSnappyBlock>& GetBlocks() const {
        return Blocks;
    }

    TMaybe<TTimestamp> FirstRecordTime() const {
        for (const auto timeIndex : xrange(Timestamps.size())) {
            if (Timestamps[timeIndex]) {
                return timeIndex;
            }
        }
        return Nothing();
    }

    TMaybe<TTimestamp> LastRecordTime() const {
        if (!Timestamps.empty()) {
            return Timestamps.size() - 1;
        } else {
            return Nothing();
        }
    }

private:
    const NBlockCodecs::ICodec& GetCodec() const {
        return *NBlockCodecs::Codec("zstd08_1");
    }

    size_t FindSize(size_t offset) const {
        const auto it(LowerBound(SizeOffsets.begin(), SizeOffsets.end(), offset));
        if (it != SizeOffsets.end() && *it == offset) {
            const auto nextIt(it + 1);
            if (nextIt != SizeOffsets.end()) {
                return *nextIt - *it;
            } else if (nextIt == SizeOffsets.end()) {
                return TotalSize - *it;
            }
        }
        ythrow yexception() << "can't find given offset";
    }

    ui32 GetSignalOffset(TSignalName signal) {
        THashMap<TSignalName, size_t>::insert_ctx context;
        const auto it(KnownSignalsOffsets.find(signal, context));
        if (it != KnownSignalsOffsets.end()) {
            return it->second;
        }
        KnownSignals.emplace_back(signal);
        return KnownSignalsOffsets.emplace_direct(context, signal, KnownSignalsOffsets.size())->second;
    }

    void SaveBlocks(msgpack::packer<msgpack::sbuffer>& packer) const {
        packer.pack_array(Blocks.size());
        for (const auto& block : Blocks) {
            packer.pack_array(2);
            packer.pack_uint64(block.UncompressedSize);
            packer.pack_uint64(block.CompressedSize);
        }
    }

    void SaveSizes(msgpack::packer<msgpack::sbuffer>& packer) const {
        packer.pack_array(SizeOffsets.size());
        for (const auto offset : SizeOffsets) {
            packer.pack_uint64(offset);
        }
    }

    void SaveSignals(msgpack::packer<msgpack::sbuffer>& packer) const {
        packer.pack_array(KnownSignals.size());
        for (const auto signal : KnownSignals) {
            PackString(packer, signal.GetName());
        }
    }

    void SaveSeries(msgpack::packer<msgpack::sbuffer>& packer) {
        packer.pack_map(Series.size());
        for (const auto& tagOffsets : Series) {
            PackString(packer, tagOffsets.first.ToNamed());
            packer.pack_array(tagOffsets.second.size());
            for (const auto offset : tagOffsets.second) {
                packer.pack_uint64(offset);
            }
        }
    }

    void SaveTimestamps(msgpack::packer<msgpack::sbuffer>& packer) {
        packer.pack_array(Timestamps.size());
        for (const auto flag : Timestamps) {
            if (flag) {
                packer.pack_true();
            } else {
                packer.pack_false();
            }
        }
    }

    void LoadBlocks(const msgpack::object blocks) {
        NMonitoring::EnsureIs(blocks, msgpack::type::object_type::ARRAY);
        const auto incoming(blocks.via.array);
        Blocks.resize(incoming.size);
        auto it(Blocks.begin());
        for (const auto& block : NMonitoring::TArrayIterator(incoming)) {
            const auto element(block.via.array);
            it->UncompressedSize = element.ptr[0].via.u64;
            it->CompressedSize = element.ptr[1].via.u64;
            ++it;
        }
    }

    void LoadSizes(const msgpack::object sizes) {
        NMonitoring::EnsureIs(sizes, msgpack::type::object_type::ARRAY);
        const auto incoming(sizes.via.array);
        SizeOffsets.resize(incoming.size);
        auto it(SizeOffsets.begin());
        for (const auto& offset : NMonitoring::TArrayIterator(incoming)) {
            *it = offset.via.u64;
            ++it;
        }
        TotalSize = GetUncompressedSize(Blocks);
    }

    void LoadSignals(const msgpack::object tags) {
        NMonitoring::EnsureIs(tags, msgpack::type::object_type::ARRAY);
        const auto incoming(tags.via.array);
        KnownSignals.reserve(incoming.size);
        KnownSignalsOffsets.reserve(incoming.size);
        for (const auto& tagName : NMonitoring::TArrayIterator(incoming)) {
            KnownSignals.emplace_back(tagName.as<TStringBuf>());
            KnownSignalsOffsets.emplace(KnownSignals.back(), KnownSignals.size() - 1);
        }
    }

    void LoadSeries(const msgpack::object series) {
        NMonitoring::EnsureIs(series, msgpack::type::object_type::MAP);
        const auto incoming(series.via.map);
        Series.reserve(incoming.size);
        for (const auto& tagOffsets : NMonitoring::TMapIterator(incoming)) {
            TVector<size_t> offsets;
            offsets.reserve(tagOffsets.val.via.array.size);
            for (const auto& offset : NMonitoring::TArrayIterator(tagOffsets.val.via.array)) {
                offsets.emplace_back(offset.via.u64);
            }
            Series.emplace(
                TInstanceKey::FromNamed(tagOffsets.key.as<TStringBuf>()),
                std::move(offsets)
            );
        }
    }

    void LoadTimestamps(const msgpack::object timestamps) {
        NMonitoring::EnsureIs(timestamps, msgpack::type::object_type::ARRAY);
        const auto incoming(timestamps.via.array);
        Timestamps.reserve(incoming.size);
        for (const auto& flag : NMonitoring::TArrayIterator(incoming)) {
            Timestamps.emplace_back(flag.via.boolean);
        }
    }

    TInstanceState ReadRecords(const TInstanceKey instanceKey, const TVector<TSignalName>& signals,
                               TSnappyInputStream& stream, TInstant deadline) const {
        TInstanceState result;
        const auto it(Series.find(instanceKey));
        if (it == Series.end()) {
            return result;
        }

        THashSet<TSignalName> signalSet;
        signalSet.insert(signals.begin(), signals.end());

        TBuffer dataBuffer;
        for (const auto offset : it->second) {
            if (deadline < TInstant::Now()) {
                ythrow TTimeLimitExceeded() << "no time left";
            }
            dataBuffer.Resize(FindSize(offset));
            stream.Seek(offset);
            stream.Read(dataBuffer.Data(), dataBuffer.Size());
            TBufferInput bufferStream(dataBuffer);
            while (true) {
                TSeriesDescriptor descriptor;
                TString chunk;
                try {
                    ::LoadPodType(&bufferStream, descriptor);
                    ::SaveLoad(&bufferStream, chunk);
                } catch(const TLoadEOF&) {
                    break;
                }

                const size_t startTime(descriptor.Timestamp);
                for (const auto ts : xrange(startTime, startTime + descriptor.ValuesCount)) {
                    result.Matches.insert(ts);
                }

                const TSignalName signal(KnownSignals[descriptor.Signal]);
                if (signalSet.contains(signal)) {
                    result.Records[signal].Chunks.emplace_back(TSignalChunk{
                        .Descriptor=descriptor,
                        .Data=chunk
                    });
                }
            }
        }

        return result;
    }

    TVector<TSignalName> CollectSignalNames(const TTagSignals& request) const {
        THashSet<TSignalName> signals;
        for (const auto& requestKeySignals : request) {
            signals.insert(requestKeySignals.second.begin(), requestKeySignals.second.end());
        }
        TVector<TSignalName> result;
        result.insert(result.end(), signals.begin(), signals.end());
        return result;
    }

    TVector<TSnappyBlock> Blocks;
    THashMap<TInstanceKey, TVector<size_t>> Series;

    TVector<TSignalName> KnownSignals;
    THashMap<TSignalName, size_t> KnownSignalsOffsets;

    TVector<ui64> SizeOffsets;
    ui64 TotalSize = 0;

    TMaybe<TTagState> CurrentTagState;
    TVector<bool> CurrentTimestamps;

    TVector<bool> Timestamps;
};

TCompactFormat::TCompactFormat()
    : Impl(MakeHolder<TImpl>())
{
}

TCompactFormat::TCompactFormat(TMaybe<TStringBuf> incoming)
    : Impl(MakeHolder<TImpl>(incoming))
{
}

TCompactFormat::TCompactFormat(TStringBuf incoming)
    : TCompactFormat(MakeMaybe<TStringBuf>(incoming))
{
}

TCompactFormat::TCompactFormat(TMaybe<TBlob> incoming)
    : TCompactFormat(BlobToBuf(incoming))
{
}

TCompactFormat::TCompactFormat(TBlob incoming)
    : TCompactFormat(MakeMaybe<TBlob>(incoming))
{
}

TCompactFormat::~TCompactFormat() {
}

void TCompactFormat::SaveBlocks(const TVector<TSnappyBlock>& blocks) {
    Impl->SaveBlocks(blocks);
}

const TVector<TSnappyBlock>& TCompactFormat::GetBlocks() const {
    return Impl->GetBlocks();
}

void TCompactFormat::Start(TInstanceKey instanceKey, TSnappyOutputStream& stream) {
    Impl->Start(instanceKey, stream);
}

void TCompactFormat::Append(
    TSignalName signalName,
    TTimestamp offset,
    size_t valuesCount,
    ESeriesKind seriesKind,
    const TString& chunk
) {
    Impl->Append(signalName, offset, valuesCount, seriesKind, chunk);
}

void TCompactFormat::Commit(TSnappyOutputStream& stream) {
    Impl->Commit(stream);
}

TString TCompactFormat::Dump() const {
    return Impl->Dump();
}

TMaybe<TAbstractFormat::TTimestamp> TCompactFormat::FirstRecordTime() const {
    return Impl->FirstRecordTime();
}

TMaybe<TAbstractFormat::TTimestamp> TCompactFormat::LastRecordTime() const {
    return Impl->LastRecordTime();
}

TVector<TCompactFormat::TReadData> TCompactFormat::Read(
    const TVector<TTimestamp>& times,
    const TTagSignals& tags,
    TSnappyInputStream& stream
) const {
    return Impl->ReadFiltered(times, tags, stream);
}

THeaderVersionStorage TCompactFormat::GetVersion() const {
    return GetCompactVersion();
}

THeaderVersionStorage TCompactFormat::GetCompactVersion() {
    return HEADER_COMPACT_VERSION;
}
