#include "rengine.h"
#include "spamrule.h"
#include "sphtml.h"
#include "splingv.h"
#include "sptop.h"
#include "sptypes.h"

#include <mail/so/libs/protect/protect.h>

#include <library/cpp/charset/recyr.hh>
#include <library/cpp/iterator/zip.h>

#include <algorithm>
#include <array>

static const auto REPLACE_BIND = MakeTrueConst([]() noexcept {
    std::array<ui8, 256> arr{};
    for (int i = 0; i < 256; i++)
        arr[i] = static_cast<ui8>(i);

#ifdef WIN32
    arr[(ui8)('a')] = (ui8)('�');
    arr[(ui8)('A')] = (ui8)('�');
    arr[(ui8)('b')] = (ui8)('�');
    arr[(ui8)('B')] = (ui8)('�');
    arr[(ui8)('c')] = (ui8)('�');
    arr[(ui8)('C')] = (ui8)('�');
    arr[(ui8)('e')] = (ui8)('�');
    arr[(ui8)('E')] = (ui8)('�');
    arr[(ui8)('H')] = (ui8)('�');
    arr[(ui8)('k')] = (ui8)('�');
    arr[(ui8)('K')] = (ui8)('�');
    arr[(ui8)('m')] = (ui8)('�');
    arr[(ui8)('M')] = (ui8)('�');
    arr[(ui8)('n')] = (ui8)('�');
    arr[(ui8)('o')] = (ui8)('�');
    arr[(ui8)('O')] = (ui8)('�');
    arr[(ui8)('p')] = (ui8)('�');
    arr[(ui8)('P')] = (ui8)('�');
    arr[(ui8)('T')] = (ui8)('�');
    arr[(ui8)('u')] = (ui8)('�');
    arr[(ui8)('x')] = (ui8)('�');
    arr[(ui8)('X')] = (ui8)('�');
    arr[(ui8)('y')] = (ui8)('�');
    arr[(ui8)('Y')] = (ui8)('�');
#else
    arr[(ui8)('a')] = 0xC1; //('�');
    arr[(ui8)('A')] = 0xE1; //('�');
    arr[(ui8)('b')] = 0xC2; // ('�');
    arr[(ui8)('B')] = 0xE2; // ('�');
    arr[(ui8)('c')] = 0xD3; // ('�');
    arr[(ui8)('C')] = 0xF3; // ('C');
    arr[(ui8)('e')] = 0xC5; // ('e');
    arr[(ui8)('E')] = 0xE5; // ('E');
    arr[(ui8)('H')] = 0xEE; // ('�');
    arr[(ui8)('k')] = 0xCB; // ('�');
    arr[(ui8)('K')] = 0xEB; // ('�');
    arr[(ui8)('m')] = 0xCD; // ('�');
    arr[(ui8)('M')] = 0xED; // ('�');
    arr[(ui8)('n')] = 0xD0; // ('�');
    arr[(ui8)('o')] = 0xCF; // ('�');
    arr[(ui8)('O')] = 0xEF; // ('�');
    arr[(ui8)('p')] = 0xD2; // ('�');
    arr[(ui8)('P')] = 0xF2; // ('�');
    arr[(ui8)('T')] = 0xF4; // ('�');
    arr[(ui8)('u')] = 0xC9; // ('�');
    arr[(ui8)('x')] = 0xC8; // ('�');
    arr[(ui8)('X')] = 0xE8; // ('�');
    arr[(ui8)('y')] = 0xD5; // ('�');
    arr[(ui8)('Y')] = 0xF5; // ('�');
#endif

    return arr;
}());

TLingv::TLingv(const TRulesHolder& rulesHolder) :
        m_pRulesHolder(rulesHolder)
        {

    const auto& lvRules = m_pRulesHolder.GetLvRules();
    const auto& lvKeys = m_pRulesHolder.GetLvKeys();

    m_keys.reserve(lvKeys.size() + 1);
    m_sortedkeys = new int[lvKeys.size() + 1];
    m_rgnodes = new TKeyNode[lvKeys.size() + 1];

    lvpcre.reserve(lvRules.size());

    for (int lvRuleId : lvRules) {
        const auto& rule = std::get<TLvDef>(m_pRulesHolder.RuleById(lvRuleId)->rules);
        lvpcre.emplace_back(TLvPcre{rule.kid, lvRuleId, rule.text.c_str(), rule.to, rule.after, rule.option});
    }

    memset(m_lets, 0, sizeof(m_lets));
    memset(m_big, 0, sizeof(m_big));


    TVector<std::pair<TStringBuf, int>> vkeys(Reserve(lvRules.size()));

    for (const auto& pkey : lvKeys) {
        vkeys.emplace_back(pkey.key.c_str(), pkey.id);
        m_rgnodes[pkey.id].key = pkey.key;
        m_keys.emplace_noresize(pkey.id, pkey);
    }

    std::sort(vkeys.begin(), vkeys.end(), [](const auto & p1, const auto & p2){
        return p1.first < p2.first;
    });

    auto ikeys = vkeys.cbegin();
    int let, let_cur = -1;

    for (int i = 0; ikeys != vkeys.end(); ikeys++, i++) {
        m_sortedkeys[i] = ikeys->second;
        ui8 let2 = (ui8)(ikeys->first[1]);
        let = (ui8)(ikeys->first[0]);
        if (let != let_cur) {
            m_lets[let].first = i;
            m_lets[let].c = 0;
            let_cur = let;
        }
        m_lets[let].c++;
        if (let2)
            m_big[let][let2] = 1;
        else
            memset(m_big[let], 1, 256);
    }

    // get compensatory rules ahead
    for (const auto& pcre : lvpcre) {
        if (ISLV_COMPENSATORY(pcre.option))
            m_rgnodes[pcre.kid].vrules.emplace_back(pcre);
    }

    for (const auto& pcre : lvpcre) {
        if (!ISLV_COMPENSATORY(pcre.option))
            m_rgnodes[pcre.kid].vrules.emplace_back(pcre);
    }
}

TLingv::~TLingv() {
    DELETE_ARR(m_sortedkeys);
    DELETE_ARR(m_rgnodes);
}

void TLingv::InitMessage(TRulesContext* rulesContext) {
    RulesContext = rulesContext;
    m_scores = {};
}

const TLingv::TScores& TLingv::GetScores() const {
    return m_scores;
}

static bool CheckKeyOption(const TLvKey& lvKey, const char* begin, const char* it, const char* end) {
    if ((lvKey.option & LVO_L) && it > begin && TestHtmlSymbol(*(it - 1), SP_LETTER))
        return false;
    const char* key_end = it + lvKey.key.size();
    return !((lvKey.option & LVO_R) && key_end < end && TestHtmlSymbol((static_cast<ui8>(*key_end)), SP_LETTER));
}


void TLingv::Check(const TStringBuf& text) {
    if(text.size() < 3)
        return;

    const auto* cp = CodePageByCharset(CODES_KOI8);

    const auto begin = text.cbegin();
    const auto end = text.cend();
    for (const char* it = begin; it != std::next(end, -2); ++it) {
        const auto byteLet = static_cast<ui8>(*it);
        const auto byteNextLet = static_cast<ui8>(*(it+1));

        if (!m_big[byteLet][cp->ToLower(byteNextLet)])
            continue;

        const size_t restLen = std::distance(it, end);

        if (restLen < 2)
            break;

        int ind = m_lets[byteLet].first;
        for (size_t j = 0; j < m_lets[byteLet].c; j++, ind++) {
            const size_t keyId = m_sortedkeys[ind];
            const auto& lvKey = m_keys.at(keyId);
            const TKeyNode& keyNode = m_rgnodes[keyId];
            if (!AsciiHasPrefixIgnoreCase(TStringBuf{it, end}, keyNode.key))
                continue;
            if (CheckKeyOption(lvKey, begin, it, end))
                CheckKey(keyNode, begin, it, end);
        }
    }
}
void TLingv::CheckKey(const TKeyNode& keyNode, const char* begin, const char* it, const char* end) {
    const char* it_end = it + keyNode.key.size();
    for (const TLvPcre& pcre : keyNode.vrules) {
        if (!pcre.reText || !*pcre.reText) {
            if (RulesContext->IsRuleWorked(pcre.rid))
                continue;
            RulesContext->SetRule(pcre.rid);
        } else if (ISLV_COMPENSATORY(pcre.option)) {
            if (CheckPcre(pcre, begin, it, it_end, end))
                return;
        } else if (!RulesContext->IsRuleWorked(pcre.rid) && CheckPcre(pcre, begin, it, it_end, end)) {
            RulesContext->SetRule(pcre.rid);
        }
    }
}

bool TLingv::CheckPcre(const TLvPcre& ppcre, const char* begin, const char* it, const char* it_end, const char* end) {
    const char * textBeginBeforeKey = Max(begin, it - ppcre.to);
    const char * textEndAfterKey = Min(end, it_end + ppcre.after);

    const TStringBuf textBeforeKey(textBeginBeforeKey, it);
    const TStringBuf textAfterKey(it_end, textEndAfterKey);

    TString full;

    full += textBeforeKey;
    if (ISLV_INSERT(ppcre.option)) {
        full += TStringBuf{it, it_end};
    }
    full += textAfterKey;

    if (full) {
        const auto& rule = *m_pRulesHolder.RuleById(ppcre.rid);
        const auto& lv = std::get<TLvDef>(rule.rules);

        return lv.CheckRegExp(full);
    }

    return false;
}

void TLingv::CorrectWord(char *it, const char *end) {
    for (; it != end; ++it) {
        if (const char replaced = static_cast<char>(REPLACE_BIND[ui8(*it)]); replaced != *it) {
            *it = replaced;
        }
    }
}
