#include "string_pool.h"

#include <util/generic/size_literals.h>

#include <contrib/libs/lz4/lz4.h>

using namespace yandex::solomon::common;

namespace NSolomon::NStringPool {
namespace {

/**
 * Empty strings will always have this ID.
 */
constexpr ui32 EmptyStrId = 0;

constexpr size_t POOL_MAX_SIZE = 256_MB;

void InitIndex(const TBuffer& buf, TVector<TStringBuf>* index) {
    // init index
    const char* begin = buf.data();
    const char* end = begin + buf.size();
    while (begin != end) {
        auto* p = static_cast<const char*>(std::memchr(begin, '\0', static_cast<size_t>(end - begin)));
        TStringBuf str{begin, p};
        // TODO: remove this temporary hack after fixes from https://a.yandex-team.ru/review/1820040/details
        //       will be deployed to all DataProxy clients in production
        if (!str.empty()) {
            index->push_back(str);
        }
        begin = p + 1;
    }
}

/**
 * Reference to allocated memory block with implicit conversion from TString/TBuffer/TStringBuf.
 */
struct TBlock {
    char* Data;
    size_t Size;

    template <typename T>
    TBlock(T&& t): Data(const_cast<char*>(t.data())), Size(t.size()) {}
};

/**
 * LZ4 compression codec (https://github.com/lz4/lz4)
 */
struct TLz4Codec {
    static size_t MaxCompressedLength(size_t in) {
        int rc = LZ4_compressBound(static_cast<int>(in));
        Y_ENSURE(rc > 0, "lz4 input size is too large");
        return static_cast<size_t>(rc);
    }

    static size_t Compress(TBlock in, TBlock out) {
        int rc = LZ4_compress_default(
            in.Data,
            out.Data,
            static_cast<int>(in.Size),
            static_cast<int>(out.Size));
        Y_ENSURE(rc > 0, "lz4 compression failed");
        return static_cast<size_t>(rc);
    }

    static void Decompress(TBlock in, TBlock out) {
        int rc = LZ4_decompress_safe(
            in.Data,
            out.Data,
            static_cast<int>(in.Size),
            static_cast<int>(out.Size));
        Y_ENSURE(rc >= 0, "lz4 stream is detected malformed");
    }
};

template <typename TCodec>
void Compress(const TBuffer& data, TString* out) {
    out->ReserveAndResize(TCodec::MaxCompressedLength(data.size()));
    out->resize(TCodec::Compress(data, *out));
}

void CompressPool(const TBuffer& buf, StringPool::Compression c, StringPool* pool) {
    pool->set_compression(c);
    pool->set_original_size(static_cast<ui32>(buf.size()));

    switch (c) {
    case StringPool_Compression_COMPRESSION_UNSPECIFIED:
        pool->mutable_strings()->AssignNoAlias(buf.Data(), buf.Size());
        break;
    case StringPool_Compression_LZ4:
        Compress<TLz4Codec>(buf, pool->mutable_strings());
        break;
    default:
        ythrow yexception() << "unsupported compression type: " << c;
    }
}

template <typename TCodec>
void Decompress(const TString& data, size_t originalSize, TBuffer* out) {
    out->Resize(originalSize);
    TCodec::Decompress(data, *out);
}

} // namespace

TStringPool::TStringPool() {
    Index_.emplace_back(); // emplace an empty string at index 0
}

TStringPool::TStringPool(const StringPool& pool) {
    // sanity check
    size_t originalSize = static_cast<size_t>(pool.original_size());
    Y_ENSURE(originalSize <= POOL_MAX_SIZE, "too huge string pool original size: " << originalSize << " bytes");

    switch (pool.compression()) {
    case StringPool_Compression_COMPRESSION_UNSPECIFIED:
        Buf_.Append(pool.strings().data(), pool.strings().size());
        break;
    case StringPool_Compression_LZ4:
        Decompress<TLz4Codec>(pool.strings(), originalSize, &Buf_);
        break;
    default:
        ythrow yexception() << "unsupported compression type: " << pool.compression();
    }

    Index_.emplace_back(); // emplace an empty string at index 0
    InitIndex(Buf_, &Index_);
}

TStringPool::TStringPool(TBuffer buf)
    : Buf_(std::move(buf))
{
    Index_.emplace_back(); // emplace an empty string at index 0
    InitIndex(Buf_, &Index_);
}

TStringPool TStringPool::Copy() const {
    TStringPool copy;
    copy.Buf_.Resize(Buf_.Size());
    memcpy(copy.Buf_.data(), Buf_.data(), Buf_.Size());
    copy.Index_.reserve(Index_.size());


    // empty TStringBuf was inserted in default TStringPool constructor
    for (size_t i = 1; i < Index_.size(); ++i) {
        copy.Index_.emplace_back(copy.Buf_.data() + (Index_[i].data() - Buf_.data()), Index_[i].Size());
    }

    return copy;
}

StringPool TStringPool::ToProto(StringPool::Compression c) const {
    StringPool pool;
    CompressPool(Buf_, c, &pool);
    return pool;
}

void TStringPool::ToProto(StringPool::Compression c, StringPool* pool) const {
    CompressPool(Buf_, c, pool);
}

TStringPoolBuilder::TStringPoolBuilder(size_t capacityBytes)
    : Buf_(std::make_unique<TBuffer>(capacityBytes))
    , Index_(16, TStringRefHash{Buf_.get()}, TStringRefEq{Buf_.get()})
{
    // TODO: remove this temporary hack after fixes from https://a.yandex-team.ru/review/1820040/details
    //       will be deployed to all DataProxy clients in production
    Buf_->Append('\0');
}

ui32 TStringPoolBuilder::Put(TStringBuf value) {
    if (value.empty()) {
        // do not put an empty string in the pool
        return EmptyStrId;
    }

    if (auto it = Index_.find(value); it != Index_.end()) {
        return it->second;
    }

    Y_ENSURE(Buf_->size() + value.size() + 1 <= POOL_MAX_SIZE,
            "pool max size exceeded (buf: " << Buf_->size() << ", value: " << value.size() << ')');

    ui32 id = static_cast<ui32>(Index_.size()) + 1;
    ui32 offset = static_cast<ui32>(Buf_->Size());
    ui32 size = static_cast<ui32>(value.size());

    Buf_->Append(value.data(), value.size());
    Buf_->Append('\0');

    Index_[TStringRef{offset, size}] = id;
    return id;
}

TStringPoolBuilder TStringPoolBuilder::Copy() const {
    TStringPoolBuilder copy;
    copy.Buf_->Assign(Buf_->Data(), Buf_->Size());
    copy.Index_.reserve(Index_.size());
    for (auto&& [key, value]: Index_) {
        copy.Index_.emplace(key, value);
    }
    return copy;
}

TStringPool TStringPoolBuilder::Build() {
    Index_.clear();
    Buf_->ShrinkToFit();
    return TStringPool{std::move(*Buf_)};
}

StringPool TStringPoolBuilder::Build(StringPool::Compression c) const {
    StringPool pool;
    CompressPool(*Buf_, c, &pool);
    return pool;
}

} // namespace NSolomon::NStringPool

template <>
void Out<StringPool_Compression>(IOutputStream& out, StringPool_Compression c) {
    out << StringPool::Compression_Name(c);
}
