#include <butil/StrUtils/StrUtils.h>
#include <butil/StrUtils/Encoding.h>
#include <mail_getter/UTFizer.h>

#include <internal/config.h>
#include <internal/trim.h>
#include <internal/phishing.h>
#include <internal/sanitizer.h>
#include <internal/fact_extractor/async_fact_extractor.h>
#include <internal/transformer_attributes.h>
#include <internal/event_xml.h>
#include <internal/message_context.h>
#include <internal/sanitize_transform.h>
#include <internal/transformers/text_transformer.h>
#include <internal/nested_tags_checker.h>
#include <fstream>

namespace msg_body {

std::string getCurrentTimeDate(){
    const std::time_t sTime = std::time(nullptr);
    std::string currentTimeDate(" ", 100);
    const std::size_t retSize = std::strftime(&currentTimeDate[0], currentTimeDate.size(), "[%d/%b/%Y:%X]", std::localtime(&sTime));
    currentTimeDate.resize(retSize);
    return currentTimeDate;
}

TextTransformer::TextTransformer(const Configuration& config,
        TransformerAttributes& attrs,
        MessageContext& messageContext,
        LogPtr logger,
        const IContentTypeDetector& contentTypeDetector,
        const AliasClassList& aliasClassList,
        unsigned trimThreshold,
        VdirectPtr vdirect,
        const Sanitizer& sanitizer,
        const InlineSpoofer& inlineSpoofer,
        const Recognizer::Wrapper& recognizer,
        const TextAsyncFactExtractor& asyncFactExtractor)
    : config_(config)
    , attrs_(attrs)
    , messageContext_(messageContext)
    , logger_(logger)
    , contentTypeDetector_(contentTypeDetector)
    , aliasClassList_(aliasClassList)
    , trimThreshold_(trimThreshold)
    , contentTransformers_(config, attrs, vdirect)
    , sanitizer_(sanitizer)
    , inlineSpoofer_(inlineSpoofer)
    , recognizer(recognizer)
    , asyncFactExtractor_(asyncFactExtractor)
{}

std::string decodeTextDetectLang(std::string& content, const TransformerAttributes& attrs, MetaPart& hs,
                                 const Recognizer::Wrapper& recognizer) {
    const PaLog paLog(__FUNCTION__);
    Encoding::decode(hs.encoding(), content);
    const std::string& charset = attrs.useCharset.empty() ? hs.charset() : attrs.useCharset;
    const Recognizer::LangsEnum lang = utfizeStringDetectLang(recognizer, content, charset);
    TStrUtils::replaceBadSymbols(content);
    const auto result = recognizer.isoNameByLanguageCode(lang);
    paLog.write();
    return result;
}

bool TextTransformer::checkForPhishing(const std::string& content, const std::string& stid) const {
    const PaLog paLog(__FUNCTION__);
    std::string pattern;
    const bool isPhishing = PhishingSingleton().instance().check(content, pattern);
    if (isPhishing) {
        LOGDOG_(makePhishingLogger(), notice, log::stid=stid, log::pattern=pattern);
    }
    paLog.write();
    return isPhishing;
}

void TextTransformer::checkNestedTags(MessagePart& part) const {
    if (part.isComplete && part.contentType.subtype() == "html") {
        NestedTagsChecker checker(config_, logger_);

        switch (checker.check(part.content)) {
            case NestedTagsCheckerResult::ParseError:
                MBODY_LOG_DEBUG(logger_, log::where_name="tags_check", log::message="html content parse failed", log::stid=part.stid);
                break;

            case NestedTagsCheckerResult::TagsDepthLimitExceeded:
                part.isComplete = false;
                part.divLimitExceeded = true;
                break;

            case NestedTagsCheckerResult::Ok:
                // do nothing
                break;
        }
    }
}

void TextTransformer::transformTextContent(MessagePart& part, TextTransformerResult& res) const {
    const PaLog paLogTextContent(__FUNCTION__, attrs_.requestId);
    const std::string& subtype = part.contentType.subtype();
    if (subtype == "plain") {
        part.textType = "text";
    } else if (subtype == "html") {
        part.textType = "html";
    }
    res.lang = decodeTextDetectLang(part.content, attrs_, part.headerStruct, recognizer);

    std::string trimmedContent = part.content;
    res.isTrimmed = attrs_.flags.noTrim ? false : trimBigLetter(&trimmedContent, trimThreshold_);
    if (res.isTrimmed) {
        MBODY_LOG_WARN(logger_, log::where_name="trim", log::message="big letter was trimmed");
        part.isComplete = false;
    }

    const auto sanitizerStrategyApplicable = [&](sanitizer::Strategy s) {
        return attrs_.sanitizerStrategy == s || config_.sanitizerStrategy == s;
    };

    FactExtractor::RequestPtr factExtractorRequest;
    if (!attrs_.flags.sequentialFactex) {
        const PaLog paLogFactExtractor("FactExtractor", attrs_.requestId);
        factExtractorRequest = asyncFactExtractor_.extractFactsBegin(part, messageContext_, attrs_);
        res.facts = asyncFactExtractor_.extractFactsEnd(factExtractorRequest, paLogFactExtractor);
    }

    part.isPhishing = checkForPhishing(trimmedContent, part.stid)
            && !messageContext_.isOurMessage;

    checkNestedTags(part);

    const PaLog paLogSanitizer("Sanitizer", attrs_.requestId);
    const bool isSanitizableHtml = (!trimmedContent.empty() && (subtype == "html"));
    const bool isSanitizableText = (!trimmedContent.empty() && (subtype == "plain") && config_.plainTextSanitize && !attrs_.flags.draft);
    if ((isSanitizableHtml || isSanitizableText) && !sanitizerStrategyApplicable(sanitizer::Strategy::tykva)) {
        try {
            mail_getter::SanitizerParsedResponse sanitizerRes = sanitizer_.sanitizeEnd(
                trimmedContent,
                attrs_.uid,
                attrs_.mid,
                part.stid,
                part.contentType.typeAndSubtype(),
                attrs_.from,
                attrs_.system,
                paLogSanitizer
            );
            MBODY_LOG_DEBUG(logger_, log::where_name="sanitizer_strategy", log::message="fetched from real sanitizer");
            if (attrs_.flags.xmlStreamerOn || attrs_.flags.draft) {
                SanitizeTransformParams stParams;
                if (!attrs_.flags.xmlStreamerMobile || attrs_.flags.draft) {
                    stParams.SetCidParts(messageContext_.cidParts);
                }
                SanitizeTransformResult stRes = sanitizeTransform(inlineSpoofer_, sanitizerRes, stParams);
                part.content.swap(stRes.content);
                messageContext_.cids_ = stRes.cids;
            } else {
                part.content.swap(sanitizerRes.html);
            }
            if (isSanitizableText) {
                part.textType = "html";
                part.contentType.setSubtype("html");
                res.typeInfo.contentType.setSubtype("html");
            }
        } catch (const std::exception& e) {
            MBODY_LOG_ERROR(logger_, log::where_name="sanitizer_strategy", log::exception=e);
            if (isSanitizableHtml) {
                if (sanitizerStrategyApplicable(sanitizer::Strategy::fallback)) {
                    MBODY_LOG_WARN(logger_, log::where_name="sanitizer_strategy", log::message="real sanitizer failed, falling back to tykva");
                    sanitizer::sanitizeByTykva(trimmedContent);
                    part.content.swap(trimmedContent);
                    part.afterTykva = true;
                } else {
                    MBODY_LOG_WARN(logger_, log::where_name="sanitizer_strategy", log::message="real sanitizer failed, no tykva fallback enabled");
                    throw;
                }
            } else {
                MBODY_LOG_WARN(logger_, log::where_name="sanitizer_strategy", log::message="real sanitizer failed, plain text sanitizing not applicable");
                part.content.swap(trimmedContent);
            }
        }
    } else if (isSanitizableHtml && sanitizerStrategyApplicable(sanitizer::Strategy::tykva)) {
        MBODY_LOG_WARN(logger_, log::where_name="sanitizer_strategy", log::message="tykva only mode on");
        sanitizer::sanitizeByTykva(trimmedContent);
        part.content.swap(trimmedContent);
        part.afterTykva = true;
    } else {
        part.content.swap(trimmedContent);
    }

    res.contentTransformersResult = contentTransformers_.apply(part);

    if (attrs_.flags.xmlStreamerOn) {
        try {
            EventXmlResult eventXmlResult = eventXmlTransform(part.content, part.isPhishing, part.videoLinks);
            part.content.swap(eventXmlResult.content_);
            if (attrs_.flags.xmlStreamerMobile) {
                messageContext_.cids_.swap(eventXmlResult.cids_);
            }
        } catch (const std::exception& e) {
            MBODY_LOG_WARN(logger_, log::where_name="event_xml", log::message="exception while transforming xml", log::exception=e);
            part.isComplete = false;
        }
    }

    if (attrs_.flags.sequentialFactex) {
        const PaLog paLogFactExtractor("FactExtractor", attrs_.requestId);
        factExtractorRequest = asyncFactExtractor_.extractFactsBegin(part, messageContext_, attrs_);
        res.facts = asyncFactExtractor_.extractFactsEnd(factExtractorRequest, paLogFactExtractor);
    }
    paLogTextContent.write();
}

TextTransformerResult TextTransformer::apply(MessagePart& part) const {
    TextTransformerResult res;
    res.hid = part.hid;
    res.isMain = part.isMain;
    res.typeInfo = getTypeInfo(part, contentTypeDetector_, aliasClassList_);
    res.isRaw = attrs_.flags.outputAsCDATA;
    transformTextContent(part, res);
    res.content = part.content;
    res.divLimitExceeded = part.divLimitExceeded;
    res.afterTykva = part.afterTykva;
    return res;
}

}
