#include "regexp_re2.h"

#include <contrib/libs/re2/re2/re2.h>

#include <library/cpp/containers/stack_array/stack_array.h>

#include <util/generic/buffer.h>
#include <util/generic/vector.h>

using namespace NSrvKernel;

class TRegexp::TOpts::TImpl: public re2::RE2::Options {
public:
    TImpl() noexcept {
        set_posix_syntax(true);
        set_longest_match(false);
        set_log_errors(false);
        // set_max_mem
        set_literal(false);
        set_never_nl(true);
        set_case_sensitive(true);
        set_perl_classes(true);
        set_word_boundary(true);
        set_one_line(true);
    }
};


TRegexp::TOpts::TOpts() noexcept
    : Impl_(new TImpl)
{}

TRegexp::TOpts::TOpts(TRegexp::TOpts&& other) noexcept = default;

TRegexp::TOpts& TRegexp::TOpts::operator=(TRegexp::TOpts&& other) noexcept = default;

TRegexp::TOpts::~TOpts() noexcept = default;

bool TRegexp::TOpts::GetLiteral() const noexcept {
    return Impl_->literal();
}

TRegexp::TOpts& TRegexp::TOpts::SetLiteral(bool value) noexcept {
    return Impl_->set_literal(value), *this;
}

bool TRegexp::TOpts::GetCaseInsensitive() const noexcept {
    return !Impl_->case_sensitive();
}

TRegexp::TOpts& TRegexp::TOpts::SetCaseInsensitive(bool value) noexcept {
    return Impl_->set_case_sensitive(!value), *this;
}

bool TRegexp::TOpts::GetPosixSyntax() const noexcept {
    return !Impl_->posix_syntax();
}

TRegexp::TOpts& TRegexp::TOpts::SetPosixSyntax(bool value) noexcept {
    return Impl_->set_posix_syntax(value), *this;
}

class TRegexp::TImpl: public re2::RE2 {
public:
    TImpl(TStringBuf pattern, const TOpts::TImpl& options)
        : re2::RE2(re2::StringPiece(pattern.data(), pattern.size()), options)
    {
        Y_ENSURE_EX(ok(),
            TRE2Error() << "Could not parse RE2, the reason is "
                << error() << " ; full pattern: " << TString{pattern}.Quote());
    }
};

TRegexp::TRegexp(TStringBuf pattern, const TOpts& options)
    : Impl_(new TImpl(pattern, *options.Impl_))
{}

TRegexp::TRegexp(TRegexp&& other) noexcept = default;

TRegexp& TRegexp::operator=(TRegexp&& other) noexcept = default;

TRegexp::~TRegexp() noexcept {
}

bool TRegexp::Match(TStringBuf input) const noexcept {
    return Impl_->Match(re2::StringPiece(input.Data(), input.Size()), 0, input.Size(), re2::RE2::ANCHOR_BOTH, nullptr, 0);
}

bool TRegexp::Extract(TStringBuf input, TVector<TStringBuf>* out, bool partial) const noexcept {
    const int captures = 1 + Impl_->NumberOfCapturingGroups();
    NStackArray::TStackArray<re2::StringPiece> captured(ALLOC_ON_STACK(re2::StringPiece, captures));

    if (!Impl_->Match(re2::StringPiece(input.Data(), input.Size()), 0, input.Size(), (partial ? re2::RE2::UNANCHORED : re2::RE2::ANCHOR_BOTH), captured.data(), captured.size())) {
        return false;
    }

    for (auto&& i : captured) {
        if (i.data()) {
            out->push_back(TStringBuf(i.data(), i.size()));
        } else {
            out->emplace_back();
        }
    }

    return true;
}

bool TRegexp::ExtractAll(TStringBuf input, TVector<TStringBuf>* out, bool needZeroMatch) const {
    Y_ASSERT(Impl_);
    Y_ASSERT(out);

    const int captures = 1 + Impl_->NumberOfCapturingGroups();

    if (needZeroMatch && captures == 1) {
        ythrow yexception() << "no need for zero match but regexp with no match groups at all";
    }

    NStackArray::TStackArray<re2::StringPiece> captured(ALLOC_ON_STACK(re2::StringPiece, captures));

    auto re2Input = re2::StringPiece(input.Data(), input.Size());
    bool ret = false;
    bool lastRet = false;
    size_t startPos = 0;
    const size_t endPos = re2Input.size();

    while (startPos < endPos && (ret = Impl_->Match(re2Input, startPos, endPos, re2::RE2::UNANCHORED, captured.data(), captured.size()))) {
        lastRet = ret;
        size_t nextPos = startPos;
        bool first = true;
        for (auto&& item : captured) {
            if (!needZeroMatch && first) {
                first = false;
                continue;
            }
            nextPos = item.end() - re2Input.begin();
            if (item.data()) {
                out->push_back(TStringBuf(item.data(), item.size()));
            } else {
                out->emplace_back();
            }
        }

        if (nextPos > startPos) {
            startPos = nextPos;
        } else {
            break;
        }
    }

    return lastRet;
}

void TRegexp::IRewriteSpec::Custom(TStringBuf key, TString& value) const noexcept {
    value = TString(key);
}

size_t TRegexp::Rewrite(TStringBuf pattern, char escape, TStringBuf input,
                        TString* out, bool global, const IRewriteSpec& spec) const noexcept
{
    constexpr size_t MAX_SPEC = 9;

    size_t maxSpec = 0;

    for (auto i = pattern.begin(), end = pattern.end(); i != end; ++i) {
        if (*i == escape) {
            if (++i != end) {
                if (IsDigit(*i)) {
                    maxSpec = Max<size_t>(maxSpec, *i - '0');
                }
            }
        }
    }

    maxSpec = Min<size_t>(maxSpec, MAX_SPEC);

    if (input.Empty()) {
        out->clear();
        return 0;
    }

    re2::StringPiece captured[1 + MAX_SPEC]; // TODO: rewrite to stack array

    const re2::StringPiece string(input.Data(), input.Size());

    auto pos = string.begin();
    auto lastend = string.begin() - 1;

    size_t ret = 0;

    TBuffer output;
    output.Reserve(string.size());

    while (pos <= string.end() &&
           Impl_->Match(string, pos - string.begin(), string.size(), re2::RE2::UNANCHORED, captured, 1 + maxSpec))
    {
        if (pos < captured[0].begin()) {
            output.Append(pos, captured[0].begin() - pos);
        }

        if (captured[0].begin() == lastend && captured[0].size() == 0) {
            // re2.cc:306 // Disallow empty match at end of last match: skip ahead.
            if (pos != string.end()) {
                output.Append(*pos);
            }
            ++pos;

            continue;
        }

        for (auto i = pattern.begin(), end = pattern.end(); i != end; ++i) {
            if (*i == escape) {
                if (++i != end) {
                    if (*i == escape) {
                        output.Append(escape);
                    } else if (IsDigit(*i)) {
                        const size_t ncapture = *i - '0';

                        if (ncapture <= maxSpec) {
                            const re2::StringPiece& capture = captured[ncapture];

                            if (!capture.empty()) {
                                output.Append(capture.data(), capture.size());
                            }
                        }
                    } else if (*i == '{') {
                        for (const auto k = i; i != end; ++i) {
                            if (*i == '}') {
                                TString value;
                                spec.Custom(TStringBuf(k, i + 1), value);
                                output.Append(value.Data(), value.Size());
                                break;
                            }
                        }
                    }
                }
            } else {
                output.Append(*i);
            }
        }

        pos = captured[0].end();
        lastend = pos;

        ++ret;

        if (!global) {
            break;
        }
    }

    if (ret == 0) {
        *out += input;
        return 0;
    }

    if (pos < string.end()) {
        output.Append(pos, string.end() - pos);
    }

    *out += TStringBuf(output.Data(), output.Size());

    return ret;
}
