#include "ru_locale.h"
#include "mail_threads.h"
#include "base_subject/base_subject.h"
#include "fnv_hash.h"
#include "exception.h"

#include <butil/StrUtils/utf8.h>

#include <util/string/cast.h>

#include <iterator>
#include <locale>

namespace NMthr {

namespace {

template <class InputIterator, class OutputIterator>
inline void ToLowerUTF8(InputIterator begin, InputIterator end, OutputIterator out) {
    std::transform(
        make_utf8_ro_iterator(begin, end),
        make_utf8_ro_iterator(end, end),
        make_utf8_wo_iterator(out),
        [](auto ch) { return std::tolower<wchar_t>(ch, RU_UTF8_LOCALE); });
}

} // namespace anonymous


ui64 CalcHashFromLowerUTF8(const TStringBuf& buf) {
    TString lowerBuf;
    ToLowerUTF8(buf.begin(), buf.end(), std::back_inserter(lowerBuf));
    return CalcOracleFnv1HashFromUTF8(lowerBuf);
}

void TMailThreadInfo::SetReferenceHashes(const TVector<TString>& msgIds) {
    ReferenceHashes.clear();
    std::transform(msgIds.begin(), msgIds.end(), std::back_inserter(ReferenceHashes),
        CalcHashFromLowerUTF8);
}

namespace {

EMergePolicy DeduceMergePolicySafe(const TMergeRule& mergeRule) {
    if (mergeRule.Field == "references") {
        return EMergePolicy::References;
    } else if (mergeRule.Field == "force_new") {
        return EMergePolicy::ForceNew;
    } else if (mergeRule.Field == "base_subject") {
        return EMergePolicy::Hash;
    } else if (mergeRule.Field == "domain") {
        return EMergePolicy::Hash;
    }
    ythrow TMthrException() << "Unable to deduce merge policy, field: " << mergeRule.Field;
}

EHashNameSpace DeduceHashNameSpace(const TMergeRule& mergeRule) {
    if (mergeRule.Field == "domain") {
        return EHashNameSpace::From;
    }
    return EHashNameSpace::Subject;
}

} // namespace anonymous

TMergeRule FindMergeRuleSafe(
    const TMessageInfo& msgInfo,
    const TMergeRules& mergeRules,
    const TSet<std::string>& experiments)
{
    auto baseSubject = FindBaseSubject(msgInfo.Subject);
    auto iter = mergeRules.find(baseSubject, msgInfo.DomainLabel, msgInfo.MessageTypes, experiments);
    if (iter == mergeRules.end()) {
        ythrow TMthrException() << "Unable to find rule to merge";
    }
    return *iter;
}

TMailThreadHash CalcMailThreadHash(
    const TMessageInfo& msgInfo,
    const TMergeRule& mergeRule)
{
    TMailThreadHash hash;
    hash.NameSpace = DeduceHashNameSpace(mergeRule);

    switch (hash.NameSpace) {
        case EHashNameSpace::Subject:
            hash.Value = CalcHashFromLowerUTF8(FindBaseSubject(msgInfo.Subject));
            break;
        case EHashNameSpace::From:
            if (msgInfo.HeaderFromDomain) {
                hash.Value = CalcHashFromLowerUTF8(msgInfo.HeaderFromDomain);
            }
            break;
    }
    return hash;
}

TMailThreadInfo CalcMailThreadInfo(
    const TMessageInfo& msgInfo,
    const TMergeRule& mergeRule)
{
    TMailThreadInfo threadInfo;

    auto msgIds = msgInfo.References;
    if (!msgInfo.MessageId.Empty()) {
        msgIds.emplace_back(msgInfo.MessageId);
    }
    SortUnique(msgIds);

    threadInfo.SetReferenceHashes(msgIds);
    threadInfo.MessageIds = msgIds;

    threadInfo.MergePolicy = DeduceMergePolicySafe(mergeRule);
    threadInfo.MergeRuleId = mergeRule.Id;
    threadInfo.Hash = CalcMailThreadHash(msgInfo, mergeRule);
    threadInfo.MessageIdHash = CalcHashFromLowerUTF8(msgInfo.MessageId);
    if (msgInfo.InReplyTo) {
        threadInfo.InReplyToHash = CalcHashFromLowerUTF8(msgInfo.InReplyTo);
    }
    threadInfo.Limits = TMailThreadLimits(mergeRule.RotateDays, mergeRule.RotateMails);
    return threadInfo;
}

} // namespace NMthr
