#include "html_sanitizer_misc.h"

#include <mail/so/libs/unperson/unperson.h>

#include <library/cpp/json/json_value.h>

#include <util/charset/wide.h>
#include <util/generic/vector.h>
#include <util/string/split.h>
#include <util/string/subst.h>

#include <regex>
#include <library/cpp/iterator/functools.h>
#include <util/string/strip.h>
#include <util/string/join.h>
#include <mail/so/spamstop/tools/text2shingles/lib/text2shingles.h>
#include <mail/so/spamstop/tools/so-common/parsers.h>
#include <library/cpp/json/writer/json.h>

namespace NHtmlSanMisc {

    static TString SubstLineEnds(const TString &s) {
        return SubstGlobalCopy(s, '\n', ' ');
    }

    static const std::regex unperson_re(
            "%(?:Uri|PhoneNumber|Password|LastName|FirstName|SecondName|MayBeName|Number|ShortNumber|ShortPassword|Timestamp|Date|Time)_.+?%");

    static TString ReRemove(TStringBuf text) {
        TString res(Reserve(text.size()));
        std::regex_replace(std::back_inserter(res), text.cbegin(), text.cend(), unperson_re, "");
        return res;
    }

    TUnperson::TUnperson(const TOriginal &original) : TString(SubstLineEnds(original)) {}

    TUnperson &TUnperson::operator=(const TOriginal &original) {
        TString::operator=(SubstLineEnds(original));
        return *this;
    }

    TClipped::TClipped(const TUnperson &unperson) : TString(ReRemove(unperson)) {}

    TClipped &TClipped::operator=(const TUnperson &unperson) {
        TString::operator=(ReRemove(unperson));
        return *this;
    }

    TUnpersonSet &TUnpersonSet::operator=(TOriginal original) noexcept {
        Original = std::move(original);
        return *this;
    }

    TUnpersonSet &TUnpersonSet::operator=(TUnperson unperson) noexcept {
        Unperson = std::move(unperson);
        Clipped = Unperson;
        return *this;
    }

    TUnpersonSet::operator bool() const {
        return Original || Unperson || Clipped;
    }

    TMaybe<TPersonalFilter> TPersonalFilter::Parse(const NJson::TJsonValue &val) {
        if (auto it = val.GetValueByPath("pfilter_last_type"); it && it->IsString() && it->GetString()) {
            return TPersonalFilter{it->GetString(),
                                   static_cast<ui32>(val["pfilter_hams"].GetUIntegerRobust()),
                                   static_cast<ui32>(val["pfilter_spams"].GetUIntegerRobust())};
        } else {
            return Nothing();
        }
    }

    IOutputStream &operator<<(IOutputStream &stream, const TPersonalFilter &pf) {
        const auto &LastType = pf.LastType;
        const auto &Hams = pf.Hams;
        const auto &Spams = pf.Spams;
        return stream << LabeledOutput(LastType, Hams, Spams);
    }

    TMaybe<TTabPf> TTabPf::Parse(const NJson::TJsonValue &val) {
        if (auto it = val.GetValueByPath("tabpf_last_tab"); it && it->IsString() && it->GetString()) {
            return TTabPf{it->GetString()};
        } else {
            return Nothing();
        }
    }

    IOutputStream &operator<<(IOutputStream &stream, const TTabPf &pf) {
        const auto &LastTab = pf.LastTab;
        return stream << LabeledOutput(LastTab);
    }

    TAbookInfo TAbookInfo::Parse(NJson::TJsonValue val) {
        TAbookInfo abookInfo;

        if (auto it = val.GetValueByPath("sent_count")) {
            abookInfo.SentCount = it->GetUIntegerRobust();
        }

        if (auto it = val.GetValueByPath("received_count")) {
            abookInfo.ReceivedCount = it->GetUIntegerRobust();
        }

        if (auto it = val.GetValueByPath("received_read_count")) {
            abookInfo.ReceivedReadCount = it->GetUIntegerRobust();
        }

        if (auto it = val.GetValueByPath("domain_sent_count")) {
            abookInfo.DomainSentCount = it->GetUIntegerRobust();
        }

        if (auto it = val.GetValueByPath("domain_received_count")) {
            abookInfo.DomainReceivedCount = it->GetUIntegerRobust();
        }

        if (auto it = val.GetValueByPath("domain_received_read_count")) {
            abookInfo.DomainReceivedReadCount = it->GetUIntegerRobust();
        }

        abookInfo.TimesContacted = abookInfo.SentCount;
        if (abookInfo.TimesContacted) {
            abookInfo.SourceType = "ya_sent";
        }

        abookInfo.Original = std::move(val);

        return abookInfo;
    }

    IOutputStream &operator<<(IOutputStream &stream, const TAbookInfo &ab) {

        const auto &SentCount = ab.SentCount;
        const auto &ReceivedCount = ab.ReceivedCount;
        const auto &ReceivedReadCount = ab.ReceivedReadCount;
        const auto &DomainReceivedCount = ab.DomainReceivedCount;
        const auto &DomainReceivedReadCount = ab.DomainReceivedReadCount;
        const auto &TimesContacted = ab.TimesContacted;
        const auto &SourceType = ab.SourceType;

        return stream << LabeledOutput(SentCount, ReceivedCount, ReceivedReadCount, DomainReceivedCount,
                                       DomainReceivedReadCount, TimesContacted, SourceType);
    }

    TSender TSender::Parse(NJson::TJsonValue val) {
        const auto &request = val["request"];
        TUid uid = TUid{request["uid"].GetStringRobust()};
        TSuid suid = TSuid{request["suid"].GetStringRobust()};

        // For some requests we can't extract email
        NJson::TJsonValue emailValue = request["email"];
        TString emailString;
        if (emailValue.IsString()) {
            emailString = emailValue.GetString();
        }
        TLogin login = TLogin{emailString};

        // And domain, too
        NJson::TJsonValue domainValue = request["domain"];
        TString domainString;
        if (domainValue.IsString()) {
            domainString = domainValue.GetString();
        }
        TDomain domain = TDomain{domainString};

        const auto &response = val["response"];

        auto personalFilter = TPersonalFilter::Parse(response);
        auto tabPf = TTabPf::Parse(response);
        auto abookInfo = TAbookInfo::Parse(response);

        NJson::TJsonValue rawUserMlFeatures = response["user_ml_features"];
        NJson::TJsonValue rawUserMlEmbeddings = response["user_ml_embeddings"];

        bool validInReplyTo = false;
        if (const NJson::TJsonValue *val = response.GetValueByPath("in_reply_to_matches")) {
            validInReplyTo = val->GetBooleanSafe();
        }

        bool validReferences = false;
        if (const NJson::TJsonValue *val = response.GetValueByPath("references_matches")) {
            validReferences = val->GetBooleanSafe();
        }

        return {
                std::move(val),
                std::move(uid),
                std::move(suid),
                std::move(login),
                std::move(domain),
                std::move(personalFilter),
                std::move(tabPf),
                std::move(abookInfo),
                std::move(rawUserMlFeatures),
                std::move(rawUserMlEmbeddings),
                validInReplyTo,
                validReferences
        };
    }

    IOutputStream &operator<<(IOutputStream &stream, const TSender &sender) {
        const auto &Uid = sender.Uid;
        const auto &Suid = sender.Suid;
        const auto &Login = sender.Login;
        const auto &PersonalFilter = sender.PersonalFilter;
        const auto &TabPf = sender.TabPf;
        const auto &Domain = sender.Domain;
        const auto &AbookInfo = sender.AbookInfo;
        const auto &ValidInReplyTo = sender.ValidInReplyTo;
        const auto &ValidReferences = sender.ValidReferences;

        return stream << LabeledOutput(
            Uid,
            Suid,
            Login,
            PersonalFilter,
            TabPf,
            Domain,
            AbookInfo,
            ValidInReplyTo,
            ValidReferences);
    }

    TAddress TAddress::Parse(const NJson::TJsonValue &jsonValue) {
        TAddress address;
        if (const NJson::TJsonValue *val = jsonValue.GetValueByPath("address")) {
            address.Address = val->GetStringSafe();
        }

        if (const NJson::TJsonValue *val = jsonValue.GetValueByPath("born-date")) {
            address.BornDate = TInstant::MilliSeconds(val->GetUIntegerRobust());
        }

        if (const NJson::TJsonValue *val = jsonValue.GetValueByPath("default")) {
            address.Default = val->GetBooleanRobust();
        }

        if (const NJson::TJsonValue *val = jsonValue.GetValueByPath("native")) {
            address.Native = val->GetBooleanSafe();
        }

        if (const NJson::TJsonValue *val = jsonValue.GetValueByPath("rpop")) {
            address.Rpop = val->GetBooleanSafe();
        }

        if (const NJson::TJsonValue *val = jsonValue.GetValueByPath("unsafe")) {
            address.Unsafe = val->GetBooleanSafe();
        }

        if (const NJson::TJsonValue *val = jsonValue.GetValueByPath("validated")) {
            address.Validated = val->GetBooleanSafe();
        }

        return address;
    }

    IOutputStream &operator<<(IOutputStream &stream, const TAddress &address) {
        const auto &BornDate = address.BornDate;
        const auto &Default = address.Default;
        const auto &Native = address.Native;
        const auto &Rpop = address.Rpop;
        const auto &Unsafe = address.Unsafe;
        const auto &Validated = address.Validated;
        return stream << LabeledOutput(BornDate, Default, Native, Rpop, Unsafe, Validated);
    }

    TUserInfo TUserInfo::Parse(TLogin email, const NJson::TJsonValue &jsonValue) {
        TUserInfo userInfo;

        userInfo.Email = std::move(email);

        if (const auto *val = jsonValue.GetValueByPath("address-list")) {
            const NJson::TJsonValue::TArray &rawInfos = val->GetArraySafe();
            userInfo.AddressList.reserve(rawInfos.size());
            std::transform(rawInfos.cbegin(), rawInfos.cend(),
                           std::back_inserter(userInfo.AddressList),
                           TAddress::Parse);
        }

        if (const auto *val = jsonValue.GetValueByPath("aliases.pddaliases")) {
            for (const auto &rawAliase : val->GetArraySafe()) {
                userInfo.PddAliases.emplace_back(rawAliase.GetStringSafe());
            }
        }

        if (const auto *val = jsonValue.GetValueByPath("attributes.account_is_maillist")) {
            userInfo.IsMailList = val->GetBooleanRobust();
        }

        if (const auto *val = jsonValue.GetValueByPath("attributes.phone_confirmation_timestamp")) {
            userInfo.PhoneConfirmationDate = TInstant::Seconds(IntFromString<ui64, 10>(val->GetStringRobust()));
        }

        if (const auto *val = jsonValue.GetValueByPath("dbfields/subscription.suid.2", '/')) {
            userInfo.Suid = TSuid{val->GetStringRobust()};
        }

        if (const auto *val = jsonValue.GetValueByPath("dbfields/userinfo.country.uid", '/')) {
            userInfo.Country = val->GetStringSafe();
        }

        if (const auto *val = jsonValue.GetValueByPath("dbfields/userinfo.reg_date.uid", '/')) {
            userInfo.RegDate = TInstant::ParseIso8601(val->GetStringSafe());
        }

        if (const auto *val = jsonValue.GetValueByPath("hosted")) {
            userInfo.Hosted = val->GetBooleanRobust();
        }

        if (const auto *val = jsonValue.GetValueByPath("lite")) {
            userInfo.Lite = val->GetBooleanRobust();
        }

        if (const auto *val = jsonValue.GetValueByPath("login")) {
            userInfo.Login = TLogin{val->GetStringSafe()};
        }

        if (const auto *val = jsonValue.GetValueByPath("uid")) {
            userInfo.Uid = TUid{val->GetStringRobust()};
        }

        if (const auto *val = jsonValue.GetValueByPath("karma")) {
            userInfo.Karma = val->GetIntegerRobust();
        }

        if (const auto *val = jsonValue.GetValueByPath("karma-status")) {
            userInfo.KarmaStatus = val->GetIntegerRobust();
        }

        return userInfo;
    }

    const TLegacy &TUserInfo::AsLegacy() const {
        return static_cast<const TLegacy &>(*this);
    }

    IOutputStream &operator<<(IOutputStream &stream, const TLegacy &legacy) {
//            donotreply@wchat.live id=1130000043987042 country=ru karma=0 karma_status=0 uid=1130000026784471 borndate=1518310255
        stream << legacy.Email;

        if (legacy.Suid) {
            stream << " id=" << legacy.Suid;
        }
        if (legacy.Uid) {
            stream << " uid=" << legacy.Uid;
            stream << " karma=" << legacy.Karma;
            stream << " karma_status=" << legacy.KarmaStatus;
        }
        if (legacy.Country) {
            stream << " country=" << legacy.Country;
        }
        if (legacy.RegDate) {
            stream << " borndate=" << legacy.RegDate.Seconds();
        }
        if (legacy.PhoneConfirmationDate) {
            stream << " tel=1";
        }

        return stream;
    }

    IOutputStream &operator<<(IOutputStream &stream, const TUserInfo &userInfo) {
        const auto &Email = userInfo.Email;
        const auto &Uid = userInfo.Uid;
        if (!Uid) {
            return stream << LabeledOutput(Email);
        }

        const auto &AddressList = MakeRangeJoiner(",", userInfo.AddressList);
        const auto &PddAliases = MakeRangeJoiner(",", userInfo.PddAliases);
        const auto &IsMailList = userInfo.IsMailList;
        const auto &Suid = userInfo.Suid;
        const auto &Country = userInfo.Country;
        const auto &RegDate = userInfo.RegDate;
        const auto &Hosted = userInfo.Hosted;
        const auto &Lite = userInfo.Lite;
        const auto &Login = userInfo.Login;
        const auto &PhoneConfirmationDate = userInfo.PhoneConfirmationDate;
        const auto Karma = userInfo.Karma;
        const auto KarmaStatus = userInfo.KarmaStatus;

        return stream << LabeledOutput(Email, AddressList, PddAliases, IsMailList, Suid, Country, RegDate, Hosted, Lite,
                                       Login, Uid, PhoneConfirmationDate, Karma, KarmaStatus);
    }

    TDoc TDoc::Parse(const NJson::TJsonValue &jsonValue) {
        const auto &map = jsonValue.GetMapSafe();

        TDoc doc;

        doc.hasAttachsize = MapFindPtr(map, "attachsize_b") != nullptr;

        if (const auto *val = MapFindPtr(map, "pure_body"))
            doc.PureBody = TOriginal(val->GetStringSafe());

        if (const auto *val = MapFindPtr(map, "unperson_pure_body"))
            doc.PureBody = TUnperson(TOriginal(val->GetStringSafe()));
        else
            doc.PureBody = TUnperson(doc.PureBody.Original);

        if (const auto *val = MapFindPtr(map, "hdr_subject"))
            doc.Subject = TOriginal(StripString(val->GetStringSafe()));

        if (const auto *val = MapFindPtr(map, "unperson_subject"))
            doc.Subject = TUnperson(TOriginal(StripString(val->GetStringSafe())));
        else
            doc.Subject = TUnperson(doc.Subject.Original);

        if (const auto *val = MapFindPtr(map, "body_text"))
            doc.BodyText = TOriginal(val->GetStringSafe());

        if (const auto *val = MapFindPtr(map, "unperson_body_text"))
            doc.BodyText = TUnperson(TOriginal(val->GetStringSafe()));
        else
            doc.BodyText = TUnperson(doc.BodyText.Original);

        if (const auto *val = MapFindPtr(map, "html_body"))
            doc.htmlBody = val->GetStringSafe();

        if (const auto *val = MapFindPtr(map, "hdr_from_normalized"))
            doc.normalizedFrom = StripString(val->GetStringSafe());

        if (const auto *val = MapFindPtr(map, "hdr_from_display_name"))
            doc.displayName = StripString(val->GetStringSafe());

        if (const auto *val = MapFindPtr(map, "content_type"))
            doc.contentType = val->GetStringSafe();

        if (const auto *val = MapFindPtr(map, "attachname"))
            doc.attachName = val->GetStringSafe();

        if (const auto *val = MapFindPtr(map, "md5"))
            doc.md5 = val->GetStringSafe();

        if (const auto *val = MapFindPtr(map, "x_urls")) {
            StringSplitter(val->GetStringSafe()).Split('\n').SkipEmpty().Collect(&doc.urls);
        }

        if (const auto *val = MapFindPtr(map, "hid"))
            doc.hid = val->GetStringSafe();

        if (const auto *val = MapFindPtr(map, "ugc_id"))
            doc.UgcId = val->GetStringSafe();

        if (const auto *val = MapFindPtr(map, "deobfuscated_ugc"))
            doc.Ugc = val->GetStringSafe();

        if (const auto *val = MapFindPtr(map, "pure_body_wmd_distance"))
            doc.pureBodyWmdDistance = val->GetDoubleRobust();

        if (const auto *val = MapFindPtr(map, "pure_body_wmd_neighbour_id"))
            doc.pureBodyWmdNeighbourId = val->GetStringSafe();

        if (const auto *val = MapFindPtr(map, "pure_body_wmd_neighbour_labels"))
            doc.pureBodyWmdNeighbourLabels = val->GetStringSafe();

        if (const auto *val = MapFindPtr(map, "subject_wmd_distance"))
            doc.subjectWmdDistance = val->GetDoubleRobust();

        if (const auto *val = MapFindPtr(map, "subject_wmd_neighbour_id"))
            doc.subjectWmdNeighbourId = val->GetStringSafe();

        if (const auto *val = MapFindPtr(map, "subject_wmd_neighbour_labels"))
            doc.subjectWmdNeighbourLabels = val->GetStringSafe();

        if (const auto *val = MapFindPtr(map, "attach_name_wmd_distance"))
            doc.attachNameWmdDistance = val->GetDoubleRobust();

        if (const auto *val = MapFindPtr(map, "attach_name_wmd_neighbour_id"))
            doc.attachNameWmdNeighbourId = val->GetStringSafe();

        if (const auto *val = MapFindPtr(map, "attach_name_wmd_neighbour_labels"))
            doc.attachNameWmdNeighbourLabels = val->GetStringSafe();

        if (const auto *val = MapFindPtr(map, "gateway_received_date"))
            doc.gatewayReceivedDate = TInstant::Seconds(val->GetUIntegerRobust());

        doc.Source = std::move(jsonValue);

        return doc;
    }

    TAnswer::TAnswer(TString queueId,
                     TVector<TDoc> docs,
                     TVector<TSender> senders,
                     TString hbfProjectId,
                     THashMap<TLogin, TUserInfo> userInfos,
                     TMaybe<TVector<float>> mailDssmEmbedding,
                     TVector<TString> dkimDomains,
                     TMaybe<NJson::TJsonValue> dkimStats,
                     TMaybe<TMatchedTemplate> matchedTemplate,
                     TMaybe<float> dssmDistance,
                     TString dssmNeighbourId,
                     TString dssmNeighbourLabels,
                     TMaybe<float> dssm2Distance,
                     TString dssm2NeighbourId,
                     TString dssm2NeighbourLabels,
                     TMaybe<float> dssmSubjectDistance,
                     TString dssmSubjectNeighbourId,
                     TString dssmSubjectNeighbourLabels,
                     TMaybe<bool> hasCryptaUserVector,
                     TLogin mailFromLogin,
                     TMaybe<TUserInfo> mailFromUserinfo,
                     TMaybe<NJson::TJsonValue> mailFromJsonUserinfo,
                     bool allFromSameOrgId,
                     TMaybe<NJson::TJsonValue> recipientsUserinfos,
                     TMaybe<NJson::TJsonValue> activityInfo,
                     NJson::TJsonValue senderMlFeatures,
                     NJson::TJsonValue senderMlEmbeddings,
                     TVector<TString> yaDiskInfos,
                     NJson::TJsonValue source) noexcept
            : docs(std::move(docs))
            , Senders(std::move(senders))
            , HbfProjectId(std::move(hbfProjectId))
            , UserInfos(std::move(userInfos))
            , MailDssmEmbedding(std::move(mailDssmEmbedding))
            , DkimDomains(std::move(dkimDomains))
            , DkimStats(std::move(dkimStats))
            , MatchedTemplate(std::move(matchedTemplate))
            , DssmDistance(std::move(dssmDistance))
            , DssmNeighbourId(std::move(dssmNeighbourId))
            , DssmNeighbourLabels(std::move(dssmNeighbourLabels))
            , Dssm2Distance(std::move(dssm2Distance))
            , Dssm2NeighbourId(std::move(dssm2NeighbourId))
            , Dssm2NeighbourLabels(std::move(dssm2NeighbourLabels))
            , DssmSubjectDistance(std::move(dssmSubjectDistance))
            , DssmSubjectNeighbourId(std::move(dssmSubjectNeighbourId))
            , DssmSubjectNeighbourLabels(std::move(dssmSubjectNeighbourLabels))
            , HasCryptaUserVector(std::move(hasCryptaUserVector))
            , QueueId(std::move(queueId))
            , MailFromLogin(std::move(mailFromLogin))
            , MailFromUserinfo(std::move(mailFromUserinfo))
            , MailFromJsonUserinfo(std::move(mailFromJsonUserinfo))
            , AllFromSameOrgId(allFromSameOrgId)
            , RecipientsUserinfos(std::move(recipientsUserinfos))
            , ActivityInfo(std::move(activityInfo))
            , SenderMlFeatures(std::move(senderMlFeatures))
            , SenderMlEmbeddings(std::move(senderMlEmbeddings))
            , YaDiskInfos(std::move(yaDiskInfos))
            , Source(std::move(source))
    {
    }

    TAnswer TAnswer::Parse(const NJson::TJsonValue &jsonValue) {

        TVector<TDoc> docs;
        if (const auto *it = jsonValue.GetValueByPath("docs")) {
            auto &arr = it->GetArraySafe();
            docs.reserve(arr.size());
            for(const NJson::TJsonValue& value : arr) {
                docs.emplace_back(TDoc::Parse(value));
            }
        }

        TVector<TSender> senders;
        if (const auto *it = jsonValue.GetValueByPath("senders")) {
            const auto &arr = it->GetArraySafe();
            senders.reserve(arr.size());
            std::transform(arr.cbegin(), arr.cend(), std::back_inserter(senders), TSender::Parse);
        }

        TString hbfProjectId;
        if (const auto *it = jsonValue.GetValueByPath("hbf_project_id"); it && it->IsString()) {
            hbfProjectId = it->GetString();
        }

        THashMap<TLogin, TUserInfo> userInfos;
        TMaybe<NJson::TJsonValue> recipientsUserinfos;
        if (const auto *it = jsonValue.GetValueByPath("rcpttos_userinfos")) {
            recipientsUserinfos = *it;
            const NJson::TJsonValue::TMapType &rawUserInfos = it->GetMapSafe();
            for (const auto&[email, rawUserInfo]: rawUserInfos) {
                TLogin login{FixAddress(email)};
                userInfos.emplace(login, TUserInfo::Parse(login, rawUserInfo));
            }
        }

        TMaybe<TVector<float>> mailDssmEmbedding;
        if (const auto *it = jsonValue.GetValueByPath("mail_dssm_embedding")) {
            const NJson::TJsonValue::TArray& array = it->GetArraySafe();
            mailDssmEmbedding.ConstructInPlace(Reserve(array.size()));
            for(const NJson::TJsonValue& value : array) {
                mailDssmEmbedding->emplace_back(value.GetDoubleRobust());
            }
        }

        TVector<TString> dkimDomains;
        if (const auto* it = jsonValue.GetValueByPath("dkim_domains")) {
            const NJson::TJsonValue::TArray& array = it->GetArraySafe();

            dkimDomains.reserve(array.size());
            std::transform(array.cbegin(), array.cend(), std::back_inserter(dkimDomains), [](const auto& v) {
                return v.GetStringSafe();
            });
        }

        TMaybe<NJson::TJsonValue> dkimStats;
        if (const auto* it = jsonValue.GetValueByPath("dkim_stats")) {
            dkimStats = *it;
        }

        TMaybe<TMatchedTemplate> matchedTemplate;
        if (const auto *it = jsonValue.GetValueByPath("matched_template")) {
            matchedTemplate.ConstructInPlace(TMatchedTemplate::Parse(*it));
        }

        TMaybe<float> dssmDistance;
        if (const auto *val = jsonValue.GetValueByPath("dssm_distance"))
            dssmDistance = val->GetDoubleRobust();

        TString dssmNeighbourId;
        if (const auto *val = jsonValue.GetValueByPath("dssm_neighbour_id"))
            dssmNeighbourId = val->GetStringSafe();

        TString dssmNeighbourLabels;
        if (const auto *val = jsonValue.GetValueByPath("dssm_neighbour_labels"))
            dssmNeighbourLabels = val->GetStringSafe();

        TMaybe<float> dssm2Distance;
        if (const auto *val = jsonValue.GetValueByPath("dssm2_distance"))
            dssm2Distance = val->GetDoubleRobust();

        TString dssm2NeighbourId;
        if (const auto *val = jsonValue.GetValueByPath("dssm2_neighbour_id"))
            dssm2NeighbourId = val->GetStringSafe();

        TString dssm2NeighbourLabels;
        if (const auto *val = jsonValue.GetValueByPath("dssm2_neighbour_labels"))
            dssm2NeighbourLabels = val->GetStringSafe();

        TMaybe<float> dssmSubjectDistance;
        if (const auto *val = jsonValue.GetValueByPath("dssm_subject_distance"))
            dssmSubjectDistance = val->GetDoubleRobust();

        TString dssmSubjectNeighbourId;
        if (const auto *val = jsonValue.GetValueByPath("dssm_subject_neighbour_id"))
            dssmSubjectNeighbourId = val->GetStringSafe();

        TString dssmSubjectNeighbourLabels;
        if (const auto *val = jsonValue.GetValueByPath("dssm_subject_neighbour_labels"))
            dssmSubjectNeighbourLabels = val->GetStringSafe();

        TMaybe<bool> hasCryptaUserVector;
        if (const auto* val = jsonValue.GetValueByPath("has_crypta_user_vector")) {
            hasCryptaUserVector = val->GetBooleanSafe();
        }

        TString queueId;
        if (const auto* val = jsonValue.GetValueByPath("queue_id")) {
            queueId = val->GetStringRobust();
        }

        TLogin mailFrom;
        if (const auto* val = jsonValue.GetValueByPath("mailfrom_login")) {
            mailFrom = TLogin{FixAddress(val->GetStringSafe())};
        }

        bool allFromSameOrgId = false;
        if (const auto* val = jsonValue.GetValueByPath("all_from_same_org_id")) {
            allFromSameOrgId = val->GetBooleanSafe();
        }

        TMaybe<TUserInfo> mailFromUserinfo;
        TMaybe<NJson::TJsonValue> mailFromJsonUserinfo;
        if (const auto* val = jsonValue.GetValueByPath("mailfrom_userinfo")) {
            mailFromJsonUserinfo = *val;
            mailFromUserinfo = TUserInfo::Parse(mailFrom, *val);
        }

        TMaybe<NJson::TJsonValue> activityInfo;
        if (const auto* val = jsonValue.GetValueByPath("activity")) {
            activityInfo = *val;
        }

        NJson::TJsonValue senderMlFeatures = jsonValue["sender_ml_features"];
        NJson::TJsonValue senderMlEmbeddings = jsonValue["sender_ml_embeddings"];

        TVector<TString> yaDiskInfos;
        if (const auto* it = jsonValue.GetValueByPath("yadisk_info")) {
            const NJson::TJsonValue::TArray& array = it->GetArraySafe();
            for (const NJson::TJsonValue& value : array) {
                if (value.IsString()) {
                    yaDiskInfos.emplace_back(value.GetString());
                }
            }
        }

        return TAnswer(std::move(queueId),
                       std::move(docs),
                       std::move(senders),
                       std::move(hbfProjectId),
                       std::move(userInfos),
                       std::move(mailDssmEmbedding),
                       std::move(dkimDomains),
                       std::move(dkimStats),
                       std::move(matchedTemplate),
                       std::move(dssmDistance),
                       std::move(dssmNeighbourId),
                       std::move(dssmNeighbourLabels),
                       std::move(dssm2Distance),
                       std::move(dssm2NeighbourId),
                       std::move(dssm2NeighbourLabels),
                       std::move(dssmSubjectDistance),
                       std::move(dssmSubjectNeighbourId),
                       std::move(dssmSubjectNeighbourLabels),
                       std::move(hasCryptaUserVector),
                       std::move(mailFrom),
                       std::move(mailFromUserinfo),
                       std::move(mailFromJsonUserinfo),
                       allFromSameOrgId,
                       std::move(recipientsUserinfos),
                       std::move(activityInfo),
                       std::move(senderMlFeatures),
                       std::move(senderMlEmbeddings),
                       std::move(yaDiskInfos),
                       std::move(jsonValue));
    }

    const TDoc *TAnswer::FindVisiblePart() const {
        if (visiblePart.Defined())
            return *visiblePart;

        TVector<TString> nestedMessagePrefixes;
        const TDoc *candidate = nullptr;
        for (const auto &doc : docs) {
            if (!doc.hasAttachsize && (doc.contentType == "text/html" || doc.contentType == "text/plain")) {
                bool nested = false;
                for (const auto &prefix: nestedMessagePrefixes) {
                    if (doc.hid.StartsWith(prefix)) {
                        nested = true;
                        break;
                    }
                }
                if (!nested) {
                    candidate = &doc;
                }
            } else if (doc.contentType == "message/rfc822")
                nestedMessagePrefixes.push_back(TString::Join(doc.hid, '.'));
        }

        if (candidate) {
            visiblePart = candidate;
        }
        return visiblePart.GetOrElse(nullptr);
    }

    const TDoc *TAnswer::FindTrueHtmlPart() const {
        const TDoc *doc = FindVisiblePart();
        if (doc && doc->contentType != "text/html") {
            doc = nullptr;
        }
        return doc;
    }

    const TVector<TDoc> &TAnswer::Docs() const {
        return docs;
    }

    const TSender *TAnswer::FindSenderByUid(const TUid &uid) const {
        auto it = FindIf(Senders, [&uid](const TSender& sender) {
            return sender.Uid == uid;
        });
        return it != Senders.cend() ? &*it : nullptr;
    }

    const TString &TAnswer::GetHbfProjectId() const {
        return HbfProjectId;
    }

    const TMaybe<TVector<float>> &TAnswer::GetMailDssmEmbedding() const {
        return MailDssmEmbedding;
    }

    const TVector<TString>& TAnswer::GetDkimDomains() const {
        return DkimDomains;
    }

    const TMaybe<NJson::TJsonValue>& TAnswer::GetDkimStats() const {
        return DkimStats;
    }

    TMatchedTemplate TMatchedTemplate::Parse(const NJson::TJsonValue& jsonValue) {
        const auto &map = jsonValue.GetMapSafe();

        TMatchedTemplate templateData;

        if (const auto *val = MapFindPtr(map, "stable_sign"))
            templateData.StableSign = val->GetIntegerSafe();

        if (const auto *val = MapFindPtr(map, "delta")){
            const auto &arr = val->GetArraySafe();
            templateData.delta.reserve(arr.size());
            for (const auto& i : arr)
                templateData.delta.push_back(i.GetStringSafe());
        }

        templateData.Source = jsonValue;

        return templateData;
    }

    TDeque<TVector<TString>> TMatchedTemplate::CalculateHashes() const {
        TDeque<TVector<TString>> Hashes;
        for (const TString& token : delta) {
            const auto view = StripString(token);
            if (!view.StartsWith('<') || !view.EndsWith('>'))
                Hashes.emplace_back(
                    NText2Shingles::Text2Shingles(StripString(token), LANG_UNK, true));
        }
        return Hashes;
    }

     TString TMatchedTemplate::HashesAsJson(TDeque<TVector<TString>> hashes) {
        NJsonWriter::TBuf buf(NJsonWriter::HEM_DONT_ESCAPE_HTML);
        buf.BeginList();
        for (const auto& hashesEntry: hashes) {
            buf.BeginList();
            for (const auto& hash : hashesEntry) {
                buf.WriteString(hash);
            }
            buf.EndList();
        }
        buf.EndList();
        return buf.Str();
    }
} // namespace NHtmlSanMisc
