#include "common.h"
#include "utf8_parser.h"

#include <util/charset/utf8.h>
#include <util/generic/overloaded.h>
#include <util/string/builder.h>

#include <contrib/libs/libxml/include/libxml/parserInternals.h>

#include <sstream>
#include <iomanip>

namespace {
    void WriteByte(std::stringstream& stream, ui8 byte) {
        stream << "0x" 
            << std::setfill('0') 
            << std::setw(2) 
            << std::hex 
            << std::setiosflags (std::ios::uppercase)
            << static_cast<unsigned int>(byte);
    }

    TString MakeExpectedError(const std::array<ui8, 4>& bytes, ui8 position, const TStringBuf message) {
        std::stringstream result;
        result << message;
        for (ui8 i = 0; i <= position; ++i) {
            result << " ";
            WriteByte(result, bytes[i]);
        }
        return result.str();
    }

    constexpr TStringBuf unknownCharEn = "found forbidden char with code ";
    constexpr TStringBuf unknownCharRu = "обнаружен запрещённый символ с кодом ";
    constexpr TStringBuf gotBytesEn = "these bytes represent wrong UTF-8 sequence";
    constexpr TStringBuf gotBytesRu = "данные байты представляют неправильную UTF-8 последовательность";
}

TUtf8Parser::TUtf8Parser() : state_(TState::TStart()) {
}

TUtf8Parser::TResult TUtf8Parser::ProcessChar(ui8 symbol) noexcept {
    return std::visit(TOverloaded {
        [symbol, this](TState::TStart&) {
            auto length = UTF8RuneLen(symbol);
            if (length == 0) {
                std::array<ui8, 4> arr = {symbol};
                return TResult(TErrorMessage{
                    .en = MakeExpectedError(arr, 0, gotBytesEn),
                    .ru = MakeExpectedError(arr, 0, gotBytesRu)
                });
            } else if (length == 1) {
                if (IS_CHAR_CH(symbol)) {
                    return TResult(int(1));
                } else {
                    return TResult(TErrorMessage{
                        .en = TStringBuilder{} << unknownCharEn << ui64(symbol),
                        .ru = TStringBuilder{} << unknownCharRu << ui64(symbol)
                    });
                }
            } else {
                this->state_ = TState::TParsing {
                    .parsedChars = {symbol},
                    .position = 0,
                    .length = static_cast<ui8>(length)
                };
                return TResult(int(0));
            }
        },
        [symbol, this](TState::TParsing& state) {
            ++state.position;
            state.parsedChars[state.position] = symbol;
            if (state.position + 1 == state.length) {
                wchar32 rune = BROKEN_RUNE;
                size_t read_count = 0;
                if (SafeReadUTF8Char(rune, read_count, state.parsedChars.begin(), state.parsedChars.end()) == RECODE_OK) {
                    this->state_ = TState::TStart();
                    if (IS_CHAR(rune)) {
                        return TResult(int(1));
                    } else {
                        return TResult(TErrorMessage{
                            .en = TStringBuilder{} << unknownCharEn << ui64(symbol),
                            .ru = TStringBuilder{} << unknownCharRu << ui64(symbol)
                        });
                    }
                } else {
                    const auto error = MakeExpectedError(state.parsedChars, state.position, gotBytesEn);
                    const auto error_ru = MakeExpectedError(state.parsedChars, state.position, gotBytesRu);
                    this->state_ = TState::TStart();
                    return TResult(TErrorMessage{
                        .en = error, 
                        .ru = error_ru
                    });
                }
            } else {
                return TResult(int(0));
            }
        }
    }, this->state_);
}

TUtf8Parser::TResult TUtf8Parser::Finish() noexcept {
    return std::visit(TOverloaded {
        [](TState::TStart&) {
            return TResult(0);
        },
        [](TState::TParsing& state) {
            return TResult(TErrorMessage{
                .en = MakeExpectedError(state.parsedChars, state.position, gotBytesEn),
                .ru = MakeExpectedError(state.parsedChars, state.position, gotBytesRu)
            });
        }
    }, this->state_);
}
