#include "base_rows_processor.h"

#include <library/cpp/framing/packer.h>
#include <crypta/graph/rt/events/proto/event.pb.h>

namespace NResharder {

    TBaseRowsProcessor::TBaseRowsProcessor(const TRowsProcessorConfig& config,
                                           const ui32 reshardingModule,
                                           const TString& destinationQueue,
                                           const ui64 sampleShardsMax)
        : Splitter(config.GetSplitter().GetLogfellerSplitter())
        , ReshardingModule(reshardingModule)
        , MessageType(config.GetMessageType())
        , MaxMessageSize(config.GetMaxOutputMessageSize())
        , MaxChunksCount(config.GetMaxOutputChunksCount())
        , Source(config.GetSource())
        , ShardingAlgorithm(MakeSharder(config.GetShardingAlgorithm()))
        , DestinationQueue_(destinationQueue)
        , SampleShardsMax_(sampleShardsMax)
    {
        // max string column size is 16mb, use half of limit
        constexpr ui64 kMAX_MESSGE_SIZE{8ull << 20};
        Y_ENSURE(MaxMessageSize < kMAX_MESSGE_SIZE);
    }

    bool TBaseRowsProcessor::IsAggregationTrivial() const {
        return true;
    }

    void TBaseRowsProcessor::Process(NSFStats::TSolomonContext&, TInstant, NBigRT::TRowsBatch&) const {
    }

    void TBaseRowsProcessor::Aggregate(NSFStats::TSolomonContext& /* ctx */, const NBigRT::TRowsBatch& rows, THashMap<TString, TVector<NBigRT::TAggregatedRow>>& dst) const {
        for (auto& row : rows) {
            ui64 shardId = ShardingAlgorithm->Shard(row) % ReshardingModule;
            if (shardId >= SampleShardsMax_) {
                continue;
            }
            auto aggregatedRow = NBigRT::MakeAggregated(row);
            aggregatedRow.Shard = shardId;
            dst[DestinationQueue_].push_back(std::move(aggregatedRow));
        }
    }

    void TBaseRowsProcessor::Serialize(NSFStats::TSolomonContext& ctx, THashMap<TString, TVector<NBigRT::TAggregatedRow>>& src, THashMap<TString, TVector<NBigRT::TSerializedRow>>& output) const {
        for (auto& [destinationQueue, aggregatedRows] : src) {
            SerializeSingleQueue(ctx, aggregatedRows, output[destinationQueue]);
        }
    }

    void TBaseRowsProcessor::SerializeSingleQueue(NSFStats::TSolomonContext& ctx, TVector<NBigRT::TAggregatedRow>& rows, TVector<NBigRT::TSerializedRow>& output) const {
        if (rows.empty()) {
            return;
        }

        Sort(rows, [](auto& l, auto& r) -> bool { return l.Shard < r.Shard; });
        ui64 shard{rows.front().Shard};
        TStringStream stream;
        NFraming::TPacker packer(stream);
        NCrypta::NEvent::TEventMessage message;
        auto outputMessageSize{ctx.Get<NSFStats::TSumMetric<ui64>>("output_message_size")};
        auto currentSourceOffset = rows.front().Meta.SourceOffset;
        {
            size_t chunksCount{0};
            for (auto& row : rows) {
                auto fields{dynamic_cast<const TRowFields*>(row.Fields.Get())};
                message.SetTimeStamp(row.TimeStamp.TimeT());
                message.SetStandVersion(fields->StandVersion);
                if (Source) {
                    message.SetSource(Source);
                }
                message.SetCryptaId(fields->CryptaId);
                message.SetType(row.MessageType);

                Y_PROTOBUF_SUPPRESS_NODISCARD row.Message->SerializePartialToString(message.MutableBody());

                size_t messageSize{message.ByteSizeLong()};

                // try to split chunks by MaxFrameSize (only if one message does not exceed this limit)
                if ((shard != row.Shard) ||
                    (0 != chunksCount && ((chunksCount >= MaxChunksCount) || (stream.Size() + messageSize) >= MaxMessageSize)))
                {
                    packer.Flush();
                    output.push_back(NBigRT::TSerializedRow{
                        .Shard = shard,
                        .SeqNo = currentSourceOffset,
                        .Data = TString{}});
                    output.back().Data.swap(stream.Str());
                    shard = row.Shard;
                    chunksCount = 0;
                    outputMessageSize.Inc(output.back().Data.size());
                }
                if (currentSourceOffset != row.Meta.SourceOffset) {
                    currentSourceOffset = row.Meta.SourceOffset;
                }
                packer.Add(message, true /*use cached size*/);
                ++chunksCount;
            }
        }
        packer.Flush();
        if (!stream.Str().empty()) {
            output.push_back(NBigRT::TSerializedRow{
                .Shard = shard,
                .SeqNo = currentSourceOffset,
                .Data = std::move(stream.Str())});
        }
    }

}
