#include "util/system/compat.h"
#include "mkshn.h"

#include <util/stream/printf.h>
#include <util/string/join.h>
#include <mail/so/spamstop/tools/so-clients/functional_clients/all_clients.h>
#include <mail/so/spamstop/tools/so-common/parsers.h>
#include <util/generic/hash.h>
#include <util/stream/file.h>

#include <mail/so/corp/agent_dialog.h>
#include <mail/so/spamstop/tools/so-common/ipv6.h>
#include <mail/so/corp/udnscontext.h>
#include <mail/so/spamstop/tools/so-clients/tshinglerenv.h>
#include <mail/so/spamstop/tools/so-common/sputil.h>
#include <mail/so/spamstop/tools/so-clients/functional_clients/CacheShClient.h>
#include <mail/so/spamstop/tools/so-common/ares.h>

#include <mail/so/spamstop/tools/so-clients/SperrorHolder.h>
#include <util/string/builder.h>
#include <util/string/strip.h>
#include <cstdlib>
#include <util/thread/singleton.h>

TStringBuf extract_domain(const TStringBuf& sSender) {
    auto domain_start = sSender.find('@');
    if (domain_start == TString::npos)
        return {};
    ++domain_start;
    auto domain_end = sSender.find('>', domain_start);
    if (domain_end == TString::npos)
        domain_end = sSender.find(' ', domain_start);

    if (domain_end != TString::npos)
        return sSender.substr(domain_start, domain_end - domain_start);
    else
        return sSender.substr(domain_start);
}


void TFastChecker::CheckConnect(const TStringBuf& connect, const TStringBuf& qID, const TStringBuf& ip, bool outMail) {
    if (!connect)
        ythrow yexception() << "invalid connect " << connect;

    if (ip && !outMail && GlobalContext.intranet_matcher.match(TString{ip})) {
        // fast accept by intranet
        pDaemonLogger->splog(TLOG_NOTICE, "Fast accept %sby IP %s", qID.data(), ip.data());
        iFastCode = EFastResolution::ACCEPT;
    } else {
        iFastCode = Nothing();
    }
}

void TFastChecker::CheckMailFrom(const TStringBuf& mailfrom, const TStringBuf& qID, const TStringBuf& ip, const TStringBuf& from, bool outMail) {
    if (!ip)
        return;

    auto sMailfromDomain = TString{extract_domain(mailfrom)};
    //
    // fast accept by intranet with domain check
    //
    if (!outMail && GlobalContext.intranet_matcher.match(TString{ip}, sMailfromDomain)) {
        pDaemonLogger->splog(TLOG_NOTICE, "Fast accept %sby IP %s for '%s'", qID.data(), ip.data(), sMailfromDomain.c_str());
        iFastCode = EFastResolution::ACCEPT;
        return;
    }

    if (!outMail && from) // mailbody From, means spam limit check needed
    {
        TString sFrom(from);
        size_t delim = sFrom.find("<");
        if (delim != TString::npos)
            sFrom.erase(delim, 1);
        delim = sFrom.find_first_of("\n\r> ");
        if (delim != TString::npos)
            sFrom.erase(delim);

        ui64 sh64 = FnvHash<ui64>(sFrom);
        shList.push_back(TShingleStat(sh64, 14, 1)); // EN_SH_FROM_ADDR
        std::pair<ui64, int> key_pair(sh64, 14);
        m_shLabel[key_pair] = sFrom;
        pDaemonLogger->splog(TLOG_DEBUG, "%sNSHN 14 added for '%s'", qID.data(), sFrom.c_str());
    }
}

void TFastChecker::CheckRcptto(const TStringBuf& qID, const TStringBuf& ip, const TStringBuf& rcptUid, bool outMail) {
    if (outMail || !ip)
        return;

    TString sRcptIp;

    sRcptIp.assign(ip);
    sRcptIp.append("_");
    sRcptIp.append(rcptUid);

    ui64 sh64 = FnvHash<ui64>(sRcptIp);
    shList.push_back(TShingleStat(sh64, 17, 1)); // EN_SH_IP_RCP_TO
    std::pair<ui64, int> thePair(sh64, 17);
    m_shLabel[thePair] = sRcptIp;
    pDaemonLogger->splog(TLOG_DEBUG, "%sNSHN 17 added for '%s'", qID.data(), sRcptIp.data());
}

void TFastChecker::DoShinglerCheck(const TLog& logger, const TStringBuf& mailfrom, const TStringBuf& rcptto, const TStringBuf& qID, const TSoConfig& config) try {
    //  fast spam check
    if (!GlobalContext.Pools->CountRequester || (config.dumbMode >= 4) || shList.empty())
        return;

    bool overlimit = false;

    const bool http2_res = GlobalContext.Pools->CountRequester->GetNs(shList, logger);

    if (!http2_res) {
        return;
    }

    bool bSameEmail = false;
    bool bSameDomain = false;
    bool sh14_full = false;
    bool sh17_full = false;
    bool sh17_works_alone = false;
    bool sh17_malic = false;
    int sh14_cnt = 0,
            sh17_cnt = 0;
    int ham_today;
    int spam_today;
    TString sShingles;
    char str_http[1024];

    CheckForSelfSentMessage(TString{mailfrom}, TString{rcptto}, &bSameEmail, &bSameDomain);

    for (auto& it_new : shList) {
        if (it_new.ShingleForm() == TShingleStat::TSHSHINGLE) {
            spam_today = (ui32) it_new.Spam(TODAY) + (ui32) it_new.Malic(TODAY);
            ham_today = (ui32) it_new.Ham(TODAY);

            if (it_new.Type() == 52 && (spam_today + ham_today)) // outgoing overlimit
            {
                if (!bSameEmail && !bSameDomain) {
                    overlimit = true;
                    break;
                }
            }

            if (it_new.Type() == 14 && spam_today) {
                sh14_full |= (spam_today > 1000 && ((float) it_new.Ham(TODAY) / (float)spam_today) < 0.2);
                sh14_full |= (spam_today > 200 && ((float) it_new.Ham(TODAY) / (float)spam_today) < 0.01);
                sh14_full |= spam_today > config.fsSh14sLim;
                if (spam_today > sh14_cnt)
                    sh14_cnt = spam_today;
            }

            if (it_new.Type() == 17 && spam_today) {
                sh17_full |= (spam_today > 1000 && ((float) it_new.Ham(TODAY) / (float)spam_today) < 0.2);
                sh17_full |= (spam_today > 600 && ((float) it_new.Ham(TODAY) / (float)spam_today) < 0.01);
                sh17_full |= spam_today > config.fsSh17sLim;
                if (spam_today > sh17_cnt)
                    sh17_cnt = spam_today;
                sh17_works_alone |= (spam_today > config.fsSh17sLim2) && (it_new.Ham(TODAY) < 2);
                sh17_malic |= (it_new.Malic(TODAY) > config.fsSh17mLim2) && (it_new.Ham(TODAY) < 2) && (it_new.Spam(TODAY) < 1);
            }

            if ((it_new.Type() == 14 && sh14_full) || (it_new.Type() == 17 && sh17_full)) {
                snprintf(str_http, sizeof(str_http), "t = %d, s = %d, h = %u, ts= %d th= %d m= %d ph= %d ps= %d %s %s",
                         it_new.Type(), (ui32) it_new.TotalSpam() + (ui32) it_new.TotalMalic(),
                         it_new.TotalHam(), // all, weight,
                         spam_today, it_new.Ham(TODAY), it_new.TotalMalic(),
                         (ui32) it_new.PersHam(TODAY) + (ui32) it_new.PersHam(YESTERDAY),
                         (ui32) it_new.PersSpam(TODAY) + (ui32) it_new.PersSpam(YESTERDAY),
                         it_new.ShingleStr().c_str(), m_shLabel[std::make_pair(it_new.Shingle(),
                                                                               it_new.Type())].c_str());
                pDaemonLogger->splog(TLOG_WARNING, "%sNSHN %s", qID.data(), str_http);
                snprintf(str_http, sizeof(str_http), " %s-%d", it_new.ShingleStr().c_str(),
                         it_new.Type());
                sShingles.append(str_http);
            }
        }
    }
    if (overlimit) {
        pDaemonLogger->splog(TLOG_WARNING, "%sFast reject, overlimit '%s'", qID.data(), mailfrom.data());
        iFastCode = EFastResolution::OVERLIMIT;
    }
    if (sh17_malic) {
        pDaemonLogger->splog(TLOG_WARNING, "Fast malic %sby%s", qID.data(), sShingles.c_str());
        iFastCode = EFastResolution::REJECT;
    } else if (sh17_works_alone) {
        pDaemonLogger->splog(TLOG_WARNING, "Fast spam %sby shingle 17%s", qID.data(), sShingles.c_str());
        iFastCode = EFastResolution::SPAM;
    } else if (sh14_full && sh17_full) {
        pDaemonLogger->splog(TLOG_WARNING, "Fast spam %sby%s", qID.data(), sShingles.c_str());
        iFastCode = EFastResolution::SPAM;
    }
} catch (...) {
    logger << (TLOG_ERR) << "error in DoShinglerCheck: " << CurrentExceptionMessageWithBt();
}

TRemoteHostData::TRemoteHostData(TString remoteHost) : RemoteHost(std::move(remoteHost)) {
    const char *posFRNR = strcasestr(RemoteHost.c_str(), " FRNR");
    const char *posQID = strcasestr(RemoteHost.c_str(), " QID=");
    if (posFRNR) {
        FRNR = true;
    } else {
        posFRNR = RemoteHost.cend();
    }
    if(!posQID)
        posQID = RemoteHost.cend();

    RemoteHost.erase(std::min(posQID, posFRNR), RemoteHost.cend());
    {
        constexpr const size_t max_ipv6_len = 45; //for hosts like 0000:0000:0000:0000:0000:ffff:187.250.251.113

        size_t begin = RemoteHost.find('[');

        if(TString::npos == begin)
            return;
        begin ++;

        size_t end = RemoteHost.find(']', begin);
        if(TString::npos == begin)
            return;
        const auto len = end - begin;

        if(len == 0)
            return;

        RemoteIp = RemoteHost.substr(begin, std::min(max_ipv6_len, len)); // IP address in symbolic form
    }
}

TRemoteHostData::TRemoteHostData(const mail::so::api::v1::SoRequest& request) {
    FRNR = request.request_meta().frnr();
    RemoteIp = ParseIP(request.smtp_envelope().connect_info().remote_ip());
    RemoteHost = request.smtp_envelope().connect_info().remote_host() + " [" + RemoteIp + ']';
}

TString TRemoteHostData::ParseIP(const TString &remoteIp) {
    switch (remoteIp.size()) {
        case 0:
            throw THttpError(HTTP_BAD_REQUEST) << "remote_ip not set";
            case 4: {
                char buf[INET_ADDRSTRLEN];
                if (inet_ntop(AF_INET, remoteIp.Data(), buf, INET_ADDRSTRLEN)) {
                    return buf;
                } else {
                    throw THttpError(HTTP_BAD_REQUEST) << "can't parse ipv4: " << strerror(errno);
                }
                break;
            }
            case 16: {
                char buf[INET6_ADDRSTRLEN];
                if (inet_ntop(AF_INET6, remoteIp.Data(), buf, INET6_ADDRSTRLEN)) {
                    return buf;
                } else {
                    throw THttpError(HTTP_BAD_REQUEST) << "can't parse ipv6: " << strerror(errno);
                }
                break;
            }
            default:
                throw THttpError(HTTP_BAD_REQUEST) << "unexpected ip length: " << remoteIp.size();
    }
}

namespace NAgentDialog {
    namespace NConnect {
        TStringBuf ExtractIp(const TStringBuf& connect) {
            const size_t max_ipv6_len = 45; //for hosts like 0000:0000:0000:0000:0000:ffff:187.250.251.113

            size_t begin = connect.find('[');

            if (TString::npos == begin)
                return nullptr;
            begin++;

            size_t end = connect.find(']', begin);
            if (TString::npos == begin)
                return nullptr;
            const auto len = end - begin;

            if (len == 0)
                return nullptr;

            return connect.substr(begin, std::min(max_ipv6_len, len));
        }

        TString ExtractQID(const TStringBuf& connect) {
            static const TStringBuf tag(" qid=");
            const char* start = strcasestr(connect.data(), tag.data());
            if (!start)
                return nullptr;

            start += tag.size();

            const char* end = start;

            while (end < connect.cend() && !isspace(*(end)))
                ++end;

            return TStringBuilder{} << '<' << TStringBuf(start, end) << '>';
        }
    } // namespace NConnect

    namespace NHelo {
        TString Parse(TStringBuf connect) {
            connect = StripStringLeft(connect);
            const auto spacePos = connect.find(' ');

            if (spacePos == TString::npos)
                return TString{connect};

            return TStringBuilder{} << "fakedhelo_" << connect.substr(spacePos + 1);
        }
    } // namespace NHelo

    namespace NMailFrom {
        TStringBuf ExtractFrm(TStringBuf mailfrom) {
            static const TStringBuf tag(" frm=");
            const char* sFrm = strcasestr(mailfrom.data(), tag.data());
            if (!sFrm)
                return nullptr;

            sFrm += tag.size();
            mailfrom = mailfrom.substr(std::distance(mailfrom.data(), sFrm));

            mailfrom = mailfrom.After('<');
            const auto delim = mailfrom.find_first_of("\n\r> ");
            if (delim != TString::npos)
                return mailfrom.substr(0, delim);
            return mailfrom;
        }

        TMaybe<size_t> ExtractSize(TStringBuf mailfrom) {
            static const TStringBuf tag(" SIZE=");
            const char* pos = strcasestr(mailfrom.data(), tag.data());
            if (!pos)
                return 0;
            pos += tag.size();

            auto end = pos;

            while (isdigit(*end))
                ++end;

            if(size_t size{}; TryFromString(pos, std::distance(pos, end), size))
                return size;

            return Nothing();
        }
    } // namespace NMailFrom

    namespace NRcptto {
        TStringBuf Extract(TStringBuf src, const TString& begin) {
            const char* delim = strcasestr(src.data(), begin.c_str());
            if (!delim) // UID found
                return nullptr;

            const char* tokenStart = delim + begin.size();
            const char* tokenEnd = tokenStart;
            while (isdigit(*tokenEnd))
                tokenEnd++;
            return {tokenStart, tokenEnd};
        }

        TStringBuf ExtractSuid(TStringBuf rcptto) {
            return Extract(rcptto, " id=");
        }

        TStringBuf ExtractUid(TStringBuf rcptto) {
            return Extract(rcptto, " uid=");
        }
    } // namespace NRcptto
} // namespace NAgentDialog


