#include "ampersand_parser.h"
#include "common.h"

#include <array>

#include <library/cpp/containers/comptrie/comptrie_builder.h>

#include <util/generic/strbuf.h>
#include <util/string/builder.h>
#include <util/string/join.h>

#include <contrib/libs/libxml/include/libxml/parserInternals.h>

namespace {
    constexpr std::array<TStringBuf, 5> XML_CODES = {
        TStringBuf("amp;"),
        TStringBuf("lt;"),
        TStringBuf("gt;"),
        TStringBuf("apos;"),
        TStringBuf("quot;"),
    };

    class TXmlCodesError {
    public:
        static TErrorMessage& Get() {
            static TXmlCodesError instance;
            return instance.Message_;
        }
    private:
        TXmlCodesError() {
            TStringBuilder result;
            bool first = true;
            for (const auto& code : XML_CODES) {
                if (first) {
                    first = false;
                } else {
                    result << ", ";
                }
                result << "'&" << code << "'";
            }
            Message_ = TErrorMessage{
                .en = TStringBuilder{} << "Symbols '&', '<', '>' must be escaped according to XML rules, but found no such escaping, you can use one of " << result << ".",
                .ru = TStringBuilder{} << "Символы '&', '<', '>' должны быть экранированы согласно правилам XML, но данного экранирования не обнаружено, вы можете использовать одно из " << result << "."
            };
        }

        TErrorMessage Message_;
    };

    class TXmlCodesTrie {
    public:
        static TCompactTrie<char> GetTrie() {
            static TXmlCodesTrie instance;
            return instance.BaseTrie_;
        }

    private:
        TXmlCodesTrie() {
            TCompactTrieBuilder<char> builder;
            for (size_t i = 0; i < XML_CODES.size(); i++) {
                builder.Add(XML_CODES[i], i);
            }
            builder.Save(BufferOut_);
            BaseTrie_.Init(BufferOut_.Buffer().Data(), BufferOut_.Buffer().Size());
        }

        TCompactTrie<char> BaseTrie_;
        TBufferOutput BufferOut_;
    };

    ui64 NumericCharToNumber(char symbol) {
        return symbol - '0';
    }

    ui64 HexCharToNumber(char symbol) {
        if (IsNumeric(symbol)) {
            return NumericCharToNumber(symbol);
        } else if ('a' <= symbol && symbol <= 'f') {
            return symbol - 'a' + 10;
        } else if ('A' <= symbol && symbol <= 'F') {
            return symbol - 'A' + 10;
        } else {
            Y_FAIL("Expected hex digit, but got: %c", symbol);
        }
    }

    const ui64 MAX_ALLOWED_CHARCODE_IN_XML = 0x10FFFF;

    const TStringBuf decElement = "&#NNNN;";
    const TStringBuf hexElement = "&#xHHHH;";

    TErrorMessage ForbiddenCharError(ui64 code, const TStringBuf element) {
        return TErrorMessage{
            .en = TStringBuilder{} << "Forbidden char with code " << code << " found in " << element << " element.",
            .ru = TStringBuilder{} << "Запрещённый символ с кодом " << code << " в элементе вида " << element << "."
        };
    }

    TErrorMessage TooBigNumberError(const TStringBuf element) {
        return TErrorMessage{
            .en = TStringBuilder{} << "Expected number smaller than " << MAX_ALLOWED_CHARCODE_IN_XML << " in " << element << " element.", 
            .ru = TStringBuilder{} << "Ожидалось число меньше " << MAX_ALLOWED_CHARCODE_IN_XML << " в элементе вида " << element << "."
        };
    }

    TErrorMessage ExpectedDigitError(const TStringBuf nameEn, const TStringBuf nameRu, const TStringBuf element) {
        return TErrorMessage{
            .en = TStringBuilder{} << "Expected " << nameEn << " symbol in " << element << " element.", 
            .ru = TStringBuilder{} << "Ожидался " << nameRu << " знак в элементе вида " << element << "."
        };
    }
}

TAmpersandParser::TAmpersandParser() : State_(EState::Start), Number_(0) {
}

TAmpersandParser::TResult TAmpersandParser::ProcessChar(char symbol) noexcept {
    if (this->State_ != EState::Start) {
        this->Read_ += symbol;
    }
    switch (this->State_) {
        case EState::Start: {
            if (symbol == '&') {
                this->State_ = EState::Ampersand;
                this->Read_ = TString{symbol};
            } else {
                this->Read_.clear();
            }
            return TResult();
        }
        case EState::Ampersand: {
            if (symbol == '#') {
                this->State_ = EState::Hash;
            } else {
                TCompactTrie<char> currentTrie;
                if (TXmlCodesTrie::GetTrie().FindTails(symbol, currentTrie)) {
                    this->State_ = EState::Name;
                    this->CurrentTrie = currentTrie;
                } else {
                    return TResult(TXmlCodesError::Get());
                }
            }
            return TResult();
        }
        case EState::Name: {
            auto current = this->CurrentTrie;
            if (current.FindTails(symbol, this->CurrentTrie)) {
                if (symbol == ';') {
                    this->State_ = EState::Start;
                }
                return TResult();
            } else {
                return TResult(TXmlCodesError::Get());
            }
            return TResult();
        }
        case EState::Hash: {
            if (symbol == 'x') {
                this->State_ = EState::Hex;
                this->Number_ = 0;
            } else if (IsNumeric(symbol)) {
                this->State_ = EState::Number;
                this->Number_ = NumericCharToNumber(symbol);
            } else {
                return TResult(TErrorMessage{
                    .en = "Expected 'x' or number after '&#'.", 
                    .ru = "Ожидалось 'x' или число после '&#'."
                });
            }
            return TResult();
        }
        case EState::Hex: {
            if (symbol == ';') {
                if (!IS_CHAR_CH(this->Number_)) {
                    return TResult(ForbiddenCharError(this->Number_, hexElement));
                }
                this->State_ = EState::Start;
            } else if (IsNumeric(symbol) || ('a' <= symbol && symbol <= 'f') || ('A' <= symbol && symbol <= 'F')) {
                if (this->Number_ > MAX_ALLOWED_CHARCODE_IN_XML) {
                    return TResult(TooBigNumberError(hexElement));
                } else {
                    this->Number_ = this->Number_ * 16 + HexCharToNumber(symbol);
                }
            } else {
                return TResult(ExpectedDigitError("hexadecimal", "шестнадцатеричный", hexElement));
            }
            return TResult();
        }
        case EState::Number: {
            if (symbol == ';') {
                if (!IS_CHAR_CH(this->Number_)) {
                    return TResult(ForbiddenCharError(this->Number_, decElement));
                }
                this->State_ = EState::Start;
            } else if (IsNumeric(symbol)) {
                if (this->Number_ > MAX_ALLOWED_CHARCODE_IN_XML) {
                    return TResult(TooBigNumberError(decElement));
                } else {
                    this->Number_ = this->Number_ * 10 + NumericCharToNumber(symbol);
                }
            } else {
                return TResult(ExpectedDigitError("decimal", "десятичный", decElement));
            }
            return TResult();
        }
    }
};

TAmpersandParser::TResult TAmpersandParser::Finish() noexcept {
    if (this->State_ != EState::Start) {
        return TResult(TErrorMessage{
            .en = "Unexpected end of file while reading ampersand seqence",
            .ru = "Неожиданный конец файла при чтении амперсанд-последовательности"
        });
    }
    return TResult();
}

TString TAmpersandParser::GetReadText() const noexcept {
    return this->Read_;
}
