#include "turlstat.h"
#include <library/cpp/string_utils/url/url.h>
#include <util/string/cast.h>
#include <util/string/strip.h>
#include <library/cpp/string_utils/quote/quote.h>
#include <library/cpp/charset/wide.h>
#include <library/cpp/charset/codepage.h>
#include <library/cpp/unicode/punycode/punycode.h>

#include <mail/so/spamstop/tools/so-common/sputil.h>

enum EPhishingUrl : ui8 {
    CBB = 2,
    VDIRECT = 4
};

bool TUrlInfo::NoData() const {
    return url.empty();
}

static ui32 TimeElapsed(ui32 date, ui32 current) {
    return (current >= date) ? current - date : 0;
}

ui32 TUrlInfo::FirstTimeElapsed() const {
    return TimeElapsed(first_date, current_date);
}

ui32 TUrlInfo::LastTimeElapsed() const {
    return TimeElapsed(last_date, current_date);
}

TString TUrlInfo::Virus() const {
    return virus ? "VIR2" : nullptr;
}

TString TUrlInfo::Phishing() const {
    return IsPhishing()? "PHISH" + ToString(phishing_mask): nullptr;
}

static ui64 CalculateShingle(const TString& value) {
    if (value.empty())
        return 0;

    TString local = StripString(value, [](const char* c) { return static_cast<ui8>(*c) <= ' '; });
    local.to_lower();
    return FnvHash<ui64>(local);
}

void TUrlInfo::CalcUrlShingle() {
    shingle_url = CalculateShingle(url);
}

void TUrlInfo::CalcHostShingle() {
    shingle_host = CalculateShingle(host);
}

void TUrlInfo::SetPhishingVDirect() {
    phishing_mask |= VDIRECT;
}

void TUrlInfo::SetPhishingCBB() {
    phishing_mask |= CBB;
}

bool TUrlInfo::IsPhishing() const {
    return phishing_mask != 0;
}

bool TUrlInfo::IsPhishingCBB() const {
    return phishing_mask & CBB;
}

bool TUrlInfo::IsPhishingVDirect() const {
    return phishing_mask & VDIRECT;
}

static TString ToLowerHost(const TString& host, ECharset charset) {
    if (SingleByteCodepage(charset)) {
        return ToLower(host, *CodePageByCharset(charset));
    }

    return WideToChar(ToLowerRet(CharToWide(host, charset)), charset);
}

static void SetUrlInfo(TUrlInfo& info, const TStringBuf& _url, ECharset charset) {
    auto url = ToString(CutWWWPrefix(CutSchemePrefix(CGIUnescapeRet(_url))));
    const bool isASCII = IsStringASCII(url.begin(), url.end());

    TString host, path;
    SplitUrlToHostAndPath(url, host, path);
    info.host = ToLowerHost(host, isASCII? CODES_ASCII: charset);

    if (!isASCII) {
        info.host = ForceHostNameToPunycode(CharToWide(info.host, charset));
    }
    info.url = info.host + CGIEscapeRet(path);

    info.CalcUrlShingle();
    info.CalcHostShingle();
}

TUrlStatistic::TUrlStatistic(const TStringBuf& url, ECharset charset, bool link, bool spam, bool noaddstorage, TVector<TString> aliases, ui32 flags)
    : isLink(link)
    , isSpam(spam)
    , noAddStorage(noaddstorage)
    , flags(flags)
    , aliases(std::move(aliases))
{
    SetUrlInfo(checkUrl, url, charset);
}

TUrlStatistic::TUrlStatistic(const TStringBuf& url, ECharset charset, bool link, bool spam)
    : TUrlStatistic(url, charset, link, spam, false, TVector<TString>(), 0)
{ }

void TUrlStatistic::AddLongUrl(const TStringBuf& url, ECharset charset) {
    SetUrlInfo(longUrl, url, charset);
}
