#include "hpack_strings.h"
#include "hpack_huffman_data.h"

#include <util/generic/hash_set.h>
#include <util/string/util.h>
#include <util/string/ascii.h>
#include <util/system/byteorder.h>

#include <cmath>

namespace NSrvKernel::NHTTP2 {
    namespace {
        [[nodiscard]]
        size_t DoGetHuffmanEncodedStringSize(TStringBuf decodedStr, const THuffmanEncodingTable& table) noexcept {
            size_t result = 0;

            for (const auto symbol : decodedStr) {
                result += table[(ui8)symbol].Length;
            }

            return (result + 7) / 8;
        }
    }

    size_t GetHuffmanEncodedStringSize(TStringBuf decodedStr) noexcept {
        return DoGetHuffmanEncodedStringSize(decodedStr, HUFFMAN_ENCODING_TABLE);
    }

    size_t GetHuffmanEncodedLowerCaseStringSize(TStringBuf decodedStr) noexcept {
        return DoGetHuffmanEncodedStringSize(decodedStr, HUFFMAN_ENCODING_TABLE_LOWER_CASE);
    }

    size_t GetMinHuffmanDecodedStringSize(size_t encodedSize) noexcept {
        return std::floor(encodedSize * 8.0 / HUFFMAN_MAX_LENGTH);
    }

    size_t GetMaxHuffmanDecodedStringSize(size_t encodedSize) noexcept {
        return std::ceil(encodedSize * 8.0 / HUFFMAN_MIN_LENGTH);
    }

    namespace {
        // The huffman encoding output may be viewed as a huge number, either a big or a little endian.
        // The huffman in hpack is a big endian, the first bit of the first prefix is the most significant bit.
        // This affects the way we handle its encoding and decoding.
        //
        // This is how encoding is supposed to work:
        //
        // Suppose we want to encode the string 'bbb'.
        // 'b' has the code 100011. The coding table has its Code 0x23 and Length 5.
        // Suppose the buffer is first empty and is only 16 bit long.
        // Then after writing the first 'b' it becomes
        // (big endian) 10001100,00000000. (6 bits pending)
        // Another 'b' makes it
        // (big endian) 10001110,00110000. (12 bits pending)
        // Yet another 'b' overflows the buffer
        // (big endian) 10001110,00111000.(11)
        // Now we must flush the buffer as a big endian number and refill it with the overflowed part of the code.
        // It then becomes
        // (big endian) 11000000,00000000 (2 bits pending)
        // When the input ends we must flush what's left in the buffer.
        // To do so we must pad the buffer with ones and round up the pending bits to the next byte.
        // Consider our previous example
        // (big endian) 11000000,00000000 (2 bits pending)
        // Becomes
        // (big endian) 11111111,11111111 (8 bits pending)
        // After that we simply copy it byte by byte starting from the most significant.
        //
        // The decoding is implemented as a finite state transucer sequentially applied to half-bytes of the encoded data,
        // the more significant half-byte comes first.

        void DoHuffmanEncodeString(TStringBuf input, TOutputRegion& output,
                                   const THuffmanEncodingTable& encTable) noexcept
        {
            if (Y_UNLIKELY(!input)) {
                return;
            }

            auto outputIter = output.begin();
            const auto outputEnd = output.end();

            ui64 buffer = 0;
            ui32 pending = 0;
            const auto bufCapacity = sizeof(buffer) * 8;

            for (const auto symbol : input) {
                const auto next = encTable[ui8(symbol)];
                const ui64 code = next.Code;
                pending += next.Length;

                if (Y_LIKELY(pending < bufCapacity)) {
                    // There is enough space in the buffer to add all the bits of the code.
                    buffer |= (code << (bufCapacity - pending));
                } else {
                    // There is not enough space to flush the buffer.
                    Y_VERIFY((outputIter + sizeof(buffer)) <= outputEnd);

                    // Filling the space left in the buffer with the upper bits of the code.
                    pending -= bufCapacity;
                    buffer |= (code >> pending);

                    // Flushing the buffer to the output.
                    const auto tmpBuffer = HostToInet(buffer);
                    memcpy(outputIter, &tmpBuffer, sizeof(buffer));
                    outputIter += sizeof(buffer);

                    // Filling the buffer with the lower bits of the code.
                    buffer = pending ? (code << (bufCapacity - pending)) : 0;
                }
            }

            // Flushing the unflushed buffered bits.
            if (Y_LIKELY(pending)) {
                // Padding the buffer with ones according to the spec.
                buffer |= (ui64(-1) >> pending);
                pending = (pending + 7u) & ~7u; // Align to the next byte.
                buffer >>= (bufCapacity - pending);

                // There is not enough space to flush the buffer.
                Y_VERIFY((outputIter + pending / 8) <= outputEnd);

                do {
                    pending -= 8;
                    *outputIter = (ui8) (buffer >> pending);
                    outputIter++;
                } while (pending);
            }

            output.Consume(outputIter - output.begin());
        }

        enum class ELowerCase { No, Yes };

        template <ELowerCase LowerCase>
        void DoCopyString(TStringBuf input, TOutputRegion& output) noexcept {
            Y_VERIFY(input.size() <= output.SizeAvailable());

            ui8* oldBegin = output.data();
            output.CopyUnsafe(input);

            if (ELowerCase::Yes == LowerCase) {
                const auto sz = input.size();
                for (size_t i = 0; i < sz; ++i) {
                    ui8 ch = *(oldBegin + i);
                    *(oldBegin + i) = (ch >= 'A' && ch <= 'Z' ? (ch - 'A' + 'a') : ch);
                }
            }
        }
    }

    void HuffmanEncodeString(TStringBuf input, TOutputRegion& output) noexcept {
        DoHuffmanEncodeString(input, output, HUFFMAN_ENCODING_TABLE);
    }

    void HuffmanEncodeStringLowerCase(TStringBuf input, TOutputRegion& output) noexcept {
        DoHuffmanEncodeString(input, output, HUFFMAN_ENCODING_TABLE_LOWER_CASE);
    }

    TError HuffmanDecodeString(TStringBuf input, TOutputRegion& output) noexcept {
        ui8 state = 0;
        auto outputIter = output.begin();
        const auto outputEnd = output.end();
        bool maybeFinal = true;

        for (const auto symbol : input) {
            ui8 ch = (ui8)symbol;

            // The huffman in hpack is big endian.
            for (ui8 half : {ch >> 4, ch & 0xf}) {
                const auto transition = HUFFMAN_DECODING_FST[state][half];
                Y_REQUIRE(transition.NextState != state,
                    CompressionError(ECompressionError::InvalidHuffman));

                if (transition.IsEmitting) {
                    Y_REQUIRE(outputIter != outputEnd,
                        TConnectionError(EErrorCode::PROTOCOL_ERROR, EConnProtocolError::TooBigHeaders));
                    *outputIter = transition.Symbol;
                    ++outputIter;
                }

                state = transition.NextState;
                maybeFinal = transition.IsFinal;
            }
        }

        Y_REQUIRE(maybeFinal,
            CompressionError(ECompressionError::InvalidHuffman));

        output.Consume(outputIter - output.begin());

        return {};
    }

    void CopyStringLowerCase(TStringBuf input, TOutputRegion& output) noexcept {
        DoCopyString<ELowerCase::Yes>(input, output);
    }

    void CopyString(TStringBuf input, TOutputRegion& output) noexcept {
        DoCopyString<ELowerCase::No>(input, output);
    }
}
