#include "rows_processor.h"
#include "parsers.h"
#include "algo.h"

#include <functional>

#include <ads/bsyeti/big_rt/lib/utility/logging/logging.h>

#include <library/cpp/framing/packer.h>
#include <library/cpp/json/json_reader.h>
#include <library/cpp/logger/global/global.h>
#include <library/cpp/protobuf/util/traits.h>
#include <library/cpp/safe_stats/safe_stats.h>
#include <logfeller/lib/chunk_splitter/chunk_splitter.h>
#include <util/generic/vector.h>
#include <util/generic/xrange.h>
#include <util/generic/yexception.h>
#include <util/stream/str.h>
#include <util/string/builder.h>
#include <util/string/cast.h>

#include <crypta/graph/rt/events/events.h>
#include <crypta/graph/rt/events/proto/event.pb.h>
#include <crypta/graph/rt/events/proto/fp.pb.h>
#include <crypta/graph/rt/fp/rows_processor/proto/config.pb.h>

namespace {
    // max string column size is 16mb, use half of limit
    constexpr ui64 kMAX_MESSGE_SIZE{8ull << 20};

    using namespace NResharder;

    // @brief create a row parser according to configuration
    NRtSklejka::IRowParserPtr MakeRowParser(const TRowsProcessorConfig::TParser& config, NCrypta::NEvent::EMessageType protoType) {
        auto msg{NCrypta::NEvent::MakeMessage(protoType)};
        Y_ENSURE(msg);
        return MakeParser(msg->GetDescriptor(), config);
    }

    class TRowsProcessor: public NBigRT::IRowsProcessor {
    public:
        TRowsProcessor(const TRowsProcessorConfig& config,
                       const ui32 reshardingModule,
                       const TString& destinationQueue)
            : Splitter(config.GetSplitter().GetLogfellerSplitter())
            , RowParser(MakeRowParser(config.GetParser(), config.GetMessageType()))
            , ReshardingModule(reshardingModule)
            , MessageType(config.GetMessageType())
            , MaxMessageSize(config.GetMaxOutputMessageSize())
            , MaxChunksCount(config.GetMaxOutputChunksCount())
            , Source(config.GetSource())
            , ShardingAlgorithm(MakeSharder(config.GetShardingAlgorithm()))
            , DestinationQueue_(destinationQueue)
        {
            Y_ENSURE(MaxMessageSize < kMAX_MESSGE_SIZE);
        }

        void Parse(NSFStats::TSolomonContext& ctx, const NBigRT::TRowMeta& meta, TStringBuf message,
                   NBigRT::TRowsBatch& rows) const override {
            NLogFeller::NChunkSplitter::TRecordContext context;
            TStringBuf record, skip;

            ui64 skippedByError{0};
            ui64 skippedByFormatError{0};
            ui64 skippedByIdsError{0};
            ui64 skippedByFpsError{0};
            ui64 skippedByIspError{0};
            ui64 skippedByDebounceError{0};
            ui64 skippedBySamplingError{0};

            auto splitter{NLogFeller::NChunkSplitter::CreateChunkSplitter(Splitter)};
            for (auto iterator = splitter->CreateIterator(message); iterator.NextRecord(record, skip, context);) {
                try {
                    ParseRecord(record, rows, meta, context);
                } catch ([[maybe_unused]] const TInvalidFormatError& err) {
                    ++skippedByFormatError;
                } catch ([[maybe_unused]] const TNoIdsError& err) {
                    ++skippedByIdsError;
                } catch ([[maybe_unused]] const TNoFpsError& err) {
                    ++skippedByFpsError;
                } catch ([[maybe_unused]] const TIspError& err) {
                    ++skippedByIspError;
                } catch ([[maybe_unused]] const TDebounceError& err) {
                    ++skippedByDebounceError;
                } catch ([[maybe_unused]] const TSamplingError& err) {
                    ++skippedBySamplingError;
                } catch (...) {
                    // skip whole record if error
                    ++skippedByError;
                    DEBUG_LOG << "Skipped: type=" << static_cast<i64>(MessageType)
                              << ", record='" << record << "', error=" << CurrentExceptionMessage() << '\n';
                }
            }

            ctx.Get<NSFStats::TSumMetric<ui64>>("rows_count_skipped").Inc(
                skippedByFormatError + skippedByIdsError + skippedByFpsError
                + skippedByIspError + skippedByError + skippedBySamplingError);
            ctx.Get<NSFStats::TSumMetric<ui64>>("rows_count_skipped_by_error").Inc(skippedByError);
            ctx.Get<NSFStats::TSumMetric<ui64>>("rows_count_skipped_by_format_error").Inc(skippedByFormatError);
            ctx.Get<NSFStats::TSumMetric<ui64>>("rows_count_skipped_by_fps_error").Inc(skippedByFpsError);
            ctx.Get<NSFStats::TSumMetric<ui64>>("rows_count_skipped_by_ids_error").Inc(skippedByIdsError);
            ctx.Get<NSFStats::TSumMetric<ui64>>("rows_count_skipped_by_debounce_error").Inc(skippedByDebounceError);
            ctx.Get<NSFStats::TSumMetric<ui64>>("rows_count_skipped_by_sampling_error").Inc(skippedBySamplingError);
            ctx.Get<NSFStats::TSumMetric<ui64>>("rows_count_skipped_by_isp_error").Inc(skippedByIspError);
        }

        void Process(NSFStats::TSolomonContext& /* ctx */, TInstant /* now */, NBigRT::TRowsBatch& /* rows */) const override {
        }

        bool IsAggregationTrivial() const override {
            return true;
        }

        void Aggregate(NSFStats::TSolomonContext& /* ctx */, const NBigRT::TRowsBatch& rows, THashMap<TString, TVector<NBigRT::TAggregatedRow>>& dst) const override {
            for (auto& row : rows) {
                ui64 shardId = ShardingAlgorithm->Shard(row) % ReshardingModule;
                auto aggregatedRow = NBigRT::MakeAggregated(row);
                aggregatedRow.Shard = shardId;
                dst[DestinationQueue_].push_back(std::move(aggregatedRow));
            }
        }

        void Serialize(NSFStats::TSolomonContext& ctx, THashMap<TString, TVector<NBigRT::TAggregatedRow>>& src, THashMap<TString, TVector<NBigRT::TSerializedRow>>& output) const override {
            for (auto& [destinationQueue, aggregatedRows] : src) {
                SerializeSingleQueue(ctx, aggregatedRows, output[destinationQueue]);
            }
        }

        void SerializeSingleQueue(NSFStats::TSolomonContext& ctx, TVector<NBigRT::TAggregatedRow>& rows, TVector<NBigRT::TSerializedRow>& output) const {
            if (rows.empty()) {
                return;
            }

            Sort(rows, [](auto& l, auto& r) -> bool { return l.Shard < r.Shard; });
            ui64 shard{rows.front().Shard};
            TStringStream stream;
            NFraming::TPacker packer(stream);
            NCrypta::NEvent::TSimpleEventMessage message;
            auto outputMessageSize{ctx.Get<NSFStats::TSumMetric<ui64>>("output_message_size")};
            auto currentSourceOffset = rows.front().Meta.SourceOffset;
            {
                size_t chunksCount{0};
                for (auto& row : rows) {
                    // auto fields{dynamic_cast<TRowFields*>(row.Fields.Get())};
                    message.SetTimeStamp(row.TimeStamp.TimeT());
                    // message.SetStandVersion(fields->StandVersion);
                    if (Source) {
                        message.SetSource(Source);
                    }
                    message.SetType(row.MessageType);
                    // call serialize w/o intialization check, since we alredy check that message is initialized
                    Y_PROTOBUF_SUPPRESS_NODISCARD row.Message->SerializePartialToString(message.MutableBody());

                    size_t messageSize{message.ByteSizeLong()};
                    // try to split chunks by MaxFrameSize (only if one message does not exceed this limit)
                    if ((shard != row.Shard) ||
                        (0 != chunksCount && ((chunksCount >= MaxChunksCount) || (stream.Size() + messageSize) >= MaxMessageSize)))
                    {
                        packer.Flush();
                        output.push_back(NBigRT::TSerializedRow {
                            .Shard = shard,
                            .SeqNo = currentSourceOffset,
                            .Data = TString{}
                        });
                        output.back().Data.swap(stream.Str()); // stream is empty now
                        shard = row.Shard;
                        chunksCount = 0;
                        outputMessageSize.Inc(output.back().Data.size());
                    }
                    if (currentSourceOffset != row.Meta.SourceOffset) {
                        currentSourceOffset = row.Meta.SourceOffset;
                    }
                    packer.Add(message, true /*use cached size*/);
                    ++chunksCount;
                }
            }
            packer.Flush();
            if (!stream.Str().empty()) {
                output.push_back(NBigRT::TSerializedRow {
                    .Shard = shard,
                    .SeqNo = currentSourceOffset,
                    .Data = std::move(stream.Str())
                });
            }
        }

    private:
        virtual void ParseRecord(const TStringBuf& record, NBigRT::TRowsBatch& rows, const NBigRT::TRowMeta& meta,
                                 NLogFeller::NChunkSplitter::TRecordContext& context) const {
            NBigRT::TRow row{
                .Meta = meta,
                .MessageType = static_cast<ui64>(MessageType),
                .Message = NCrypta::NEvent::MakeMessage(MessageType),
            };
            RowParser->Parse(record, row, rows, context);
        }

    private:
        const TString Splitter;
        const NRtSklejka::IRowParserPtr RowParser;
        const ui32 ReshardingModule;
        const NCrypta::NEvent::EMessageType MessageType;
        const size_t MaxMessageSize;
        const size_t MaxChunksCount;
        const TString Source;
        const NBigRT::IShardingAlgorithmPtr ShardingAlgorithm;
        const TString DestinationQueue_;
    };
}

NBigRT::IRowsProcessorPtr NResharder::MakeRowsProcessor(const TRowsProcessorConfig& config,
                                                        const ui32 reshardingModule,
                                                        const TString& destinationQueue) {
    return NYT::New<TRowsProcessor>(config, reshardingModule, destinationQueue);
}
