#include "sptypes.h"
#include "rengine.h"
#include "spbody.h"

#include <mail/so/spamstop/tools/so-common/safe_recode.h>

#include <mail/so/libs/protect/protect.h>

#include <util/folder/dirut.h>
#include <util/string/subst.h>

static const auto BodyPartRe = MakeTrueConstArray(
    // CheckMimeHeader
    TPcreUnit{"qp_illegal", "/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f-\\xff]/s", true},
    TPcreUnit{"qp_long", "/.{77}/m", true},

    TPcreUnit{"microsoft_exec_1", "/^TVqQAAMAAAAEAAAA/", true},
    TPcreUnit{"microsoft_exec_2", "/^begin [0-7]{3}.{0,5}M35J0``,````\\$````/s", true},

    TPcreUnit{"html_charset", "/charset\\s=\\s/is", true},

    TPcreUnit{"n_name", "/.*\\.(.+)/", true},
    TPcreUnit{"n_text", "/^(?:txt|[px]?html?|xml)$/", true},
    TPcreUnit{"t_text", "/^text\\/(?:plain|[px]?html?|english|sgml|xml|enriched|richtext)/m", true},
    TPcreUnit{"t_message", "/^message\\/external-body/m", true},

    TPcreUnit{"n_image", "/^(?:jpe?g|tiff?|gif|png)$/", true},
    TPcreUnit{"t_image", "/^(?:image\\/|application\\/mac-binhex)/m", true},
    TPcreUnit{"t_vcf", "/^text\\/vcard/m", true},
    TPcreUnit{"n_com", "/^(?:bat|com|exe|pif|scr|swf|vbs)$/", true},
    TPcreUnit{"t_com", "/^application\\//m", true},
    TPcreUnit{"t_doc", "/^application\\/.*word$/m", true},
    TPcreUnit{"t_ppt", "/^application\\/.*(?:powerpoint|ppt)$/m", true},
    TPcreUnit{"t_xls", "/^application\\/.*excel$/m", true},
    TPcreUnit{"a_href_mismatch", "/<[^>]{0,40}https?:\\/\\/(?:[^\\/@\\s>]@)?([^\\/\"'\\s?>]+)[^>]*>\\s*htt[p�]s?:\\/\\/(?!\\1)([^\\/\"'\\s?>]+)/", true}
);

static const TTrueConst<PcreTool> m_pcre{NRegexp::TSettings{}, MakeArrayRef(*BodyPartRe)};

void TSpBodyPart::CheckMessage(TRengine* m_prengine) {
    if (m_cur.fmime_base64_encoded_text)
        m_prengine->m_cur->rulesContext.SetRule("BASE64_ENC_TEXT");

    if (m_cur.fmime_html_no_charset)
        m_prengine->m_cur->rulesContext.SetRule("MIME_HTML_NO_CHARSET");

    if (m_cur.fmime_suspect_name)
        m_prengine->m_cur->rulesContext.SetRule("MIME_SUSPECT_NAME");

    if (m_cur.mime_body_html_count > 0 && m_cur.mime_body_text_count == 0)
        m_prengine->m_cur->rulesContext.SetRule("__MIME_HTML_ONLY");

    if (m_cur.fmicrosoft_executable)
        m_prengine->m_cur->rulesContext.SetRule("MICROSOFT_EXECUTABLE");

    if (m_cur.fmime_long_line_qp)
        m_prengine->m_cur->rulesContext.SetRule("MIME_LONG_LINE_QP");
}

int TSpBodyPart::DelRepeatedSpaces(char* szField) {
    int i, c;
    bool fSpace = false;

    for (i = 0, c = 0; szField[i] != 0; i++)
        if (TestHtmlSymbol(szField[i], SP_SPACE)) {
            if (!fSpace) {
                fSpace = true;
                szField[c++] = ' ';
            }
        } else {
            fSpace = false;
            szField[c++] = szField[i];
        }

    szField[c] = 0;
    return c;
}

void TSpBodyPart::PrepareContextType(TString& ctype) {
    SubstGlobal(ctype, "/x-", "/");
    SubstGlobal(ctype, "/vnd.", "/");
}

void TSpBodyPart::CheckMimeHeader(TBodyPartProperty* prop) {
    bool fcd = false;
    const char* pattach = "attachment";
    const char* pinline = "inline";
    const char* pcd = prop->pcd;
    const char* pctype = prop->pctype;

    if (pcd)
        fcd = ((strncmp(pcd, pattach, strlen(pattach)) == 0) ||
               (strncmp(pcd, pinline, strlen(pinline)) == 0));

    if (pctype) {
        if (!strncmp(pctype, "text", 4)) {
            if (prop->fBase64)
                m_cur.fmime_base64_encoded_text = true;

            if (!strncmp(pctype + 4, "/html", 5)) {
                m_cur.fTextHtml = true;
                m_cur.mime_body_html_count++;

                if (prop->pcharset && *prop->pcharset)
                    ; // m_cur.fmime_html_no_charset = false;
                else if (!fcd)
                    m_cur.fmime_html_no_charset = true;
            } else if (!strncmp(pctype + 4, "/plain", 5)) {
                m_cur.fTextPlain = true;
                m_cur.mime_body_text_count++;
            }
        } else {
            if (!strncmp(pctype, "message", 7))
                m_cur.mime_body_text_count++;
        }
    }

    const char* propname = (prop->pname && *prop->pname) ? prop->pname : prop->pfilename;
    const char* poctet_stream = "application/octet-stream";

    if (!m_cur.fmime_suspect_name && propname && pctype && strncmp(pctype, poctet_stream, strlen(poctet_stream))) // notequal
    {
        // MIME_SUSPECT_NAME triggered here
        TStringBuf Pattern;
        if (!(Pattern = m_pcre->GetPattern("n_name", TStringBuf(propname), 1)))
            return;

        //$ctype =~ s@/(x-|vnd\.)@/@;
        TString ctype = to_lower(TString(pctype));
        const TString name = to_lower(TString{Pattern});
        PrepareContextType(ctype);

        if (m_pcre->Check("n_text", name) &&
            !m_pcre->Check("t_text", ctype) && !m_pcre->Check("t_message", ctype))
            m_cur.fmime_suspect_name = true;

        else if (m_pcre->Check("n_image", name) && !m_pcre->Check("t_image", ctype))
            m_cur.fmime_suspect_name = true;

        else if (name == "vcf" && !m_pcre->Check("t_vcf", ctype))
            m_cur.fmime_suspect_name = true;

        else if (m_pcre->Check("n_com", name) && !m_pcre->Check("t_com", ctype))
            m_cur.fmime_suspect_name = true;

        else if ((name == "doc") && !m_pcre->Check("t_doc", ctype))
            m_cur.fmime_suspect_name = true;

        else if ((name == "ppt") && !m_pcre->Check("t_ppt", ctype))
            m_cur.fmime_suspect_name = true;

        else if ((name == "xls") && !m_pcre->Check("t_xls", ctype))
            m_cur.fmime_suspect_name = true;

    }
}

void TSpBodyPart::CheckBodyPart(TRengine* m_prengine, const char* pBody, int Len, TBodyPartProperty* prop) {
    CheckMimeHeader(prop);

    m_prengine->CheckField(FD_BODY_CONTENT_TYPE, prop->pctype);
    m_prengine->CheckField(FD_BODY_CONTENT_DISPOSITION, prop->pcd);
    m_prengine->CheckField(FD_BODY_CONTENT_CHARSET, prop->pcharset);
    m_prengine->CheckField(FD_BODY_CONTENT_TRANSFER_ENCODING, prop->pcte);
    m_prengine->CheckField(FD_BODY_ATTACHMENT_NAME, prop->pname);
    m_prengine->CheckField(FD_BODY_ATTACHMENT_NAME, prop->pfilename);

    if (prop->pct_raw && *(prop->pct_raw)) {
        char* p_raw = 0;
        STRDUP(&p_raw, prop->pct_raw, strlen(prop->pct_raw));
        DelRepeatedSpaces(p_raw);
        m_prengine->CheckField(FD_BODY_CONTENT_TYPE_RAW, p_raw);
        delete[] p_raw;
    }

    if (!prop->fBase64 && pBody && Len) {
        if (m_pcre->Check("microsoft_exec_1", TStringBuf{pBody, size_t(Len)}) ||
            m_pcre->Check("microsoft_exec_2", TStringBuf{pBody, size_t(Len)}))
            m_cur.fmicrosoft_executable = true;

        if (m_cur.fmime_html_no_charset && m_cur.fTextHtml &&
            m_pcre->Check("html_charset", TStringBuf{pBody, size_t(Len)}))
            m_cur.fmime_html_no_charset = false;

        if (prop->pcte && strstr(prop->pcte, "quoted-printable"))
        {
            if (m_pcre->Check("qp_long", TStringBuf{pBody, size_t(Len)}))
                m_cur.fmime_long_line_qp = true;
        }

        if (m_cur.fTextPlain)
            m_prengine->CheckQuoted({pBody, size_t(Len)});

        TStringBuf Pattern1, Pattern2;
        if (auto res = m_pcre->Check("a_href_mismatch", TStringBuf{pBody, size_t(Len)})) {
            if ((Pattern1 = res->GetPattern(1)) && (Pattern2 = res->GetPattern(2))) {
                m_prengine->GetSpStat().AddStat(ST_HTTP) << "a_href_mismatch: " << Pattern1 << " vs " << Pattern2;
            }
        }
    }
    HandleArchive(prop, m_prengine);
}

//#ifndef SPK-3
void TSpBodyPart::HandleArchive(const TBodyPartProperty* prop, TRengine* m_prengine) {
    if (prop->pfilename) {
        bool exe_shingle_added = false;
        bool is_supported_arc = is_archive(prop->pfilename);
        //        bool is_unsupported_arc = is_unsupported_archive(prop->pfilename);
        if (is_supported_arc) // || is_unsupported_arc)
        {
            const size_t max_message_len = 1024;
            TString message;
            message.reserve(max_message_len);
            message += normalize_name(prop->pfilename);
            message += (is_supported_arc ? " + " : " - ");
            if (prop->arc_files) {
                m_prengine->CheckRange("fcnt_arch", (int)prop->arc_files->size(), false);
                bool has_exe = false;
                size_t longest_name_len = 0;
                message += IntToStroka(prop->arc_files->size());
                TString file_names;
                file_names.reserve(max_message_len);
                for (const auto& arc_file : *prop->arc_files) {
                    const TString& name = arc_file.name;
                    m_prengine->CheckField(FD_ARCH_ATTACH_FILE, name);
                    if (file_names.size() < max_message_len)
                        file_names += " " + normalize_name(name);
                    if (!is_image(name))
                        m_prengine->AddPattern(name, EN_SH_ATTACH_NAME);
                    longest_name_len = std::max(longest_name_len, strlen(GetFileNameComponent(name.c_str())));
                    if (is_executable(name)) {
                        m_prengine->AddPattern(name, EN_SH_ATTACH_EXEC);
                        if (!has_exe) {
                            message += " exe";
                            has_exe = true;
                            exe_shingle_added = true;
                        }
                    }
                }
                if (!has_exe)
                    message += " no_exe";
                message += file_names;
                if (longest_name_len)
                    m_prengine->CheckRange("arc_file_name_len", (int)longest_name_len, false);
            }
            m_prengine->GetSpStat().AddStat(ST_ARCHIVE) << SafeRecode(message);
        } else {
            if (!is_image(prop->pfilename)) {
                m_prengine->GetSpStat().AddStat(ST_ATTACH) << SafeRecode(prop->pfilename);
                m_prengine->AddPattern(prop->pfilename, EN_SH_ATTACH_NAME);
            }
        }
        if (!exe_shingle_added && is_executable(prop->pfilename))
            m_prengine->AddPattern(prop->pfilename, EN_SH_ATTACH_EXEC);
    }
}
//#else  //SPK-3
//void TSpBodyPart::HandleArchive(const TBodyPartProperty*) {
//}
//#endif //SPK-3
