#include "UrlReputationShClient.h"
#include <mail/so/spamstop/tools/so-clients/general_shingler_get.h>
#include <mail/so/spamstop/tools/so-clients/general_shingler_put.h>
#include <util/string/split.h>
#include <library/cpp/string_utils/url/url.h>
#include <mail/so/spamstop/tools/so-common/tkipv6.h>

namespace NFuncClient {

    static auto ToMap(const NJson::TJsonValue& json) {
        THashMap<TString, NJson::TJsonValue::TArray > result;
        for (const auto& item : json.GetArray()) {
            const TString scheme = item["scheme"].GetStringRobust();
            const NJson::TJsonValue& value = item["find"];

            auto& data = result[scheme];
            const auto& varray = value.GetArray();
            data.insert(data.end(), varray.begin(), varray.end());
        }
        return result;
    }

    class TGetUrlInfoRequest : public NGeneralShingler::TGetRequest {
    public:
        using TGetUrlUnfo = TVector<TUrlStatisticInfo>;
        TGetUrlInfoRequest(const TGetUrlUnfo& hosts, const TGetUrlUnfo& all) {
            Y_ASSERT(!all.empty());

            CreateDBRequest(hosts);
            CreateVirusPhishingRequest(all);
            CreateDNSRequest(all);
        }

        void ParseAnswer(const NJson::TJsonValue& json) final {
            auto data = ToMap(json);
            const ui32 time = GetCurrent(data["current"]);
            FillUrlCounters(data["today"], time, true);
            FillUrlCounters(data["history"], time, false);
            FillMalwareFlags(data["malware"]);
            FillDNSInfo(data["dns"]);
        }

    private:
        void CreateDBRequest(const TGetUrlUnfo& data) {
            NJson::TJsonValue::TArray array;
            for (const auto& [info, stats] : data) {
                const ui64 shingle = info->shingle_host;
                if(shingle) {
                    array.emplace_back()["shingle"] = shingle;
                    m_hosts[shingle].emplace_back(info);
                }
            }

            if (!array.empty()) {
                AddMessage("today", array);
                AddMessage("history", array);
            }
            AddMessage("current", NJson::TJsonValue::TArray());
        }

        void CreateVirusPhishingRequest(const TGetUrlUnfo& data) {
            NJson::TJsonValue::TArray array;
            for (const auto& [info, stats] : data) {
                for (const auto& hash : CreateUrlHashes(info->url)) {
                    array.emplace_back()["value"] = hash;
                    m_urls[hash].emplace_back(info);
                }
            }

            if (!array.empty())
                AddMessage("malware", array);
        }

        void CreateDNSRequest(const TGetUrlUnfo& data) {
            NJson::TJsonValue::TArray array;
            for (const auto& item : data) {
                const TString& host = item.first->host;
                NJson::TJsonValue value;
                value["host"] = host;

                for (const auto& alias : item.second->aliases) {
                    value["alias"] = alias;
                    array.push_back(value);

                    m_aliasInfo[std::make_pair(host, alias)] = item;
                }
            }

            if (!array.empty())
                AddMessage("dns", array);
        }

        TVector<ui64> CreateUrlHashes(TStringBuf url) {
            TVector<ui64> result;
            THash<TStringBuf> hasher;
            TStringBuf dummy, host;

            if (url.TrySplit('?', host, dummy)) {
                result.emplace_back(hasher(url));
                url = host;
            }

            host = url.Before('/');
            while (host.TrySplit('.', dummy, host)) {
                TStringBuf path = url;
                do {
                    result.emplace_back(hasher(path));
                } while (path.TryRSplit('/', path, dummy));
                url = url.After('.');
            }

            return result;
        }

        static ui32 GetCurrent(const NJson::TJsonValue::TArray& data)
        try {
            return data.empty()? 0: data.front().GetMapSafe().begin()->second.GetUIntegerRobust();
        }
        catch (const yexception&) {
            return 0;
        }

        void FillUrlCounters(const NJson::TJsonValue::TArray& data, ui32 time, bool today) {
            for (const auto& item : data) {
                if(TVector<TUrlInfo*>* infos = MapFindPtr(m_hosts, item["shingle"].GetUIntegerRobust())) {
                    for(TUrlInfo* urlInfo : *infos) {
                        TUrlCounters& counters = today ? urlInfo->today : urlInfo->history;
                        FillUrlCounters(item, counters);

                        if (!today) {
                            urlInfo->last_date = static_cast<ui32>(item["date"].GetUIntegerRobust());
                            urlInfo->first_date = static_cast<ui32>(item["create_date"].GetUIntegerRobust());
                            urlInfo->current_date = time;
                        }
                    }
                }
            }
        }

        void FillUrlCounters(const NJson::TJsonValue& data, TUrlCounters& counters) {
            counters.ham = static_cast<ui32>(data["ham"].GetUIntegerRobust());
            counters.spam = static_cast<ui32>(data["spam"].GetUIntegerRobust());
            counters.complaint_ham = static_cast<ui32>(data["complaint_ham"].GetUIntegerRobust());
            counters.complaint_spam = static_cast<ui32>(data["complaint_spam"].GetUIntegerRobust());
        }

        void FillMalwareFlags(const NJson::TJsonValue::TArray& data) {
            for (const auto& item : data) {
                if(TVector<TUrlInfo*>* infos = MapFindPtr(m_urls, item["value"].GetUIntegerRobust())) {
                    for(TUrlInfo* urlInfo : *infos) {
                        for (const auto& tag : item["tags"].GetArray()) {
                            const TString& value = tag.GetString();
                            if (value == "cbb77")
                                urlInfo->SetPhishingCBB();
                            else if (value.StartsWith("fraud.phishing"))
                                urlInfo->SetPhishingVDirect();
                            else if (value.StartsWith("malware"))
                                urlInfo->virus = true;
                        }
                    }
                }
            }
        }

        static TMaybe<ui8> GetMinorByte(const NJson::TJsonValue::TArray& data) {
            for (const auto& item : data) {
                TKIPv6 ip(item.GetString());
                if (!ip.Undefined())
                    return ip.GetMinorByte();
            }
            return{};
        }

        void FillDNSInfo(const NJson::TJsonValue::TArray& data) {
            for (const auto& item : data) {
                const auto key = std::make_pair(item["host"].GetString(), item["alias"].GetString());
                auto it = m_aliasInfo.find(key);
                if (it != m_aliasInfo.end()) {
                    const auto byte = GetMinorByte(item["resolved"].GetArray());

                    if (byte.Defined()) {
                        TUrlInfo* info = it->second.first;
                        info->aliases[key.second] = *byte;

                        if (key.second == "surbl") {
                            info->server_surbl = *byte;
                        }
                    }
                }
            }
        }

        THashMap<ui64, TVector<TUrlInfo*>> m_urls, m_hosts;
        THashMap<std::pair<TString, TString>, TUrlStatisticInfo> m_aliasInfo;
    };

    class TResolveRequest : public NGeneralShingler::TGetRequest {
    public:
        using TData = THashMap<TString, TVector<TUrlStatistic*>>;

        TResolveRequest(const TData& data) : m_data(data) {
            NJson::TJsonValue::TArray array;
            for (const auto& item : m_data) {
                array.emplace_back()["url"] = item.first;
            }

            AddMessage("resolve", array);
        }

        void ParseAnswer(const NJson::TJsonValue& json) final {
            const auto values = std::move(ToMap(json)["resolve"]);
            for (const auto& item: values) {
                const TString& url = item["url"].GetString();
                for (auto& it : m_data[url]) {
                    it->AddLongUrl(CutUrl(item["resolved"].GetString()), CODES_ASCII);
                }
            }
        }

    private:
        TString getLogCode() const final {
            return "RESOLVE";
        }

        static TStringBuf CutUrl(TStringBuf value) {
            value.ChopSuffix("/");
            return CutWWWPrefix(CutSchemePrefix(value));
        }

        TData m_data;
    };

    bool TUrlReputation::Get(TUrlStatisticVector& data, const TLog& logger) const {
        Resolve(data, logger);

        auto urlInfos = GetUrlInfo(data, false);
        urlInfos.second.insert(urlInfos.second.end(), urlInfos.first.begin(), urlInfos.first.end());
        if (urlInfos.second.empty())
            return true;

        TGetUrlInfoRequest request(urlInfos.first, urlInfos.second);
        return Getter.Perform(request, logger);
    }

    bool TUrlReputation::Put(const TUrlPutRequest& request, const TLog& logger) const {
        return Updater.Perform(request, logger);
    }

    bool TUrlReputation::Complaint(const TUrlStatisticVector& data, bool spam, const TLog& logger) const {
        auto urlInfos = GetUrlInfo(data, true).first;
        if (urlInfos.empty())
            return true;

        TUrlComplaintRequest request(urlInfos, spam);
        return Updater.Perform(request, logger);
    }

    bool TUrlReputation::Resolve(TUrlStatisticVector& data, const TLog& logger) const {
        TResolveRequest::TData urls;
        for (auto& item : data) {
            if (item.isLink && !item.checkUrl.NoData())
                urls[item.checkUrl.url].push_back(&item);
        }

        if (urls.empty())
            return true;

        TResolveRequest request(urls);
        return Getter.Perform(request, logger);
    }
} /* namespace NFuncClient */
