#include <library/cpp/testing/benchmark/bench.h>

#include <solomon/services/fetcher/lib/yasm/yasm_decoder.h>
#include <solomon/services/fetcher/lib/yasm/encoder.h>

#include <infra/yasm/interfaces/internal/agent.pb.h>

#include <library/cpp/resource/resource.h>

#include <util/datetime/base.h>

using namespace NMonitoring;
using namespace NSolomon::NFetcher::NYasm;
using namespace NSolomon::NFetcher;
using namespace ::NYasm::NInterfaces::NInternal;

/*
Results from solomon-dev-myt-00.search.yandex.net
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

$ ./run.sh

----------- TDevNull ---------------
 samples:       87
 iterations:    4753
 iterations hr:    4.75K
 run time:      5.060093958
 per iteration: 2083544.315 (2.08M) cycles
----------- TSpackIdentity ---------------
 samples:       40
 iterations:    1035
 iterations hr:    1.03K
 run time:      5.121370656
 per iteration: 9821110.34 (9.82M) cycles
----------- TSpackZstd ---------------
 samples:       17
 iterations:    153
 iterations hr:    153
 run time:      5.03851979
 per iteration: 62085332.84 (62.1M) cycles
----------- TSpackLz4 ---------------
 samples:       36
 iterations:    820
 iterations hr:    820
 run time:      5.032914993
 per iteration: 12011753.45 (12M) cycles
*/

const struct TFixture {
    TAgentResponse Proto;

    TFixture() {
        const auto data = NResource::Find("response.proto");
        Y_ENSURE(Proto.ParseFromString(data));
    }
} FIXTURE;

struct TDevNullEncoder: public IMultiShardEncoder {
    void AddShard(TYasmShardKey) override {}
    void SwitchShards() override {}

    bool SupportedValue(const TValue&) override {
        return true;
    }

    void WriteLabel(TStringBuf, TStringBuf) override {}
    void WriteValue(const ::NYasm::NInterfaces::NInternal::TValue&) override {}

    void Close(IDataConsumer*) override {}
};

struct TDevNullConsumer: public IDataConsumer {
    void OnShardData(TYasmShardKey key, TString data) override {
        Y_DO_NOT_OPTIMIZE_AWAY(key);
        Y_DO_NOT_OPTIMIZE_AWAY(data);
    }
};

// baseline, only decoding is benchmarked
Y_CPU_BENCHMARK(TDevNull, iface) {
    auto decoder = CreateYasmAgentDecoder("localhost", MakeHolder<TDevNullEncoder>());
    for (size_t i = 0; i < iface.Iterations(); i++) {
        TDevNullConsumer consumer;
        decoder->Decode(
                FIXTURE.Proto.GetPerInstanceRecords(),
                FIXTURE.Proto.GetAggregatedRecords(),
                &consumer);
    }
}

Y_CPU_BENCHMARK(TSpackIdentity, iface) {
    auto decoder = CreateYasmAgentDecoder("localhost", CreateMultiShardEncoder(ECompression::IDENTITY));
    for (size_t i = 0; i < iface.Iterations(); i++) {
        TDevNullConsumer consumer;
        decoder->Decode(
                FIXTURE.Proto.GetPerInstanceRecords(),
                FIXTURE.Proto.GetAggregatedRecords(),
                &consumer);
    }
}

Y_CPU_BENCHMARK(TSpackZstd, iface) {
    auto decoder = CreateYasmAgentDecoder("localhost", CreateMultiShardEncoder(ECompression::ZSTD));
    for (size_t i = 0; i < iface.Iterations(); i++) {
        TDevNullConsumer consumer;
        decoder->Decode(
                FIXTURE.Proto.GetPerInstanceRecords(),
                FIXTURE.Proto.GetAggregatedRecords(),
                &consumer);
    }
}

Y_CPU_BENCHMARK(TSpackLz4, iface) {
    auto decoder = CreateYasmAgentDecoder("localhost", CreateMultiShardEncoder(ECompression::LZ4));
    for (size_t i = 0; i < iface.Iterations(); i++) {
        TDevNullConsumer consumer;
        decoder->Decode(
                FIXTURE.Proto.GetPerInstanceRecords(),
                FIXTURE.Proto.GetAggregatedRecords(),
                &consumer);
    }
}
