#include "parsed_doc_storage.h"
#include <saas/rtyserver/components/indexer/indexer/zone_extractor.h>
#include <ysite/parser/xml/xml_parse.h>
#include <kernel/indexer/face/docinfo.h>
#include <library/cpp/numerator/numerate.h>
#include <library/cpp/html/zoneconf/attrextractor.h>

TRTYParsedDocStorage::TRTYParsedDocStorage(const TBaseConfig& cfg, const TZoneExtractorConfig& zoneExtCfg, bool backwardCompatible)
    : TParsedDocStorage(cfg)
    , ZoneExtCfg(zoneExtCfg)
    , CurrentXmlConf(&DefaultXmlConf)
    , DocsAttrs(nullptr)
    , BackwardCompatibleTokenization(backwardCompatible)
{
    DefaultXmlConf.Configure(cfg.XmlParserConf.data());
}

void TRTYParsedDocStorage::DoParseUnknownFormat(IParsedDocProperties* docProps, const TDocInfoEx* docInfo) {
    MimeTypes mimeType = (MimeTypes)docInfo->DocHeader->MimeType;
    if (mimeType == MIME_XML) {
        TMemoryInput input(docInfo->DocText, docInfo->DocSize);
        Storage.SetPeerMode(NHtml::TStorage::CopyText);
        NHtml::TParserResult parserResult(Storage);
        ParseWholeXmlDocument(*CurrentXmlConf, &input, docProps, &parserResult);
    }
}

void TRTYParsedDocStorage::DoNumerateUnknownFormat(MimeTypes mimeType, INumeratorHandler& handler, IParsedDocProperties* docProps) {
    if (mimeType == MIME_XML) {
        Numerate(handler, Storage.Begin(), Storage.End(), docProps, CurrentXmlConf, Numerator::TConfig(BackwardCompatibleTokenization));
    }
}

void TRTYParsedDocStorage::SetCustomConfig(const char* conf) {
    if (conf && *conf) {
        CustomXmlConf.ConfigureFromMemory(conf);
        CurrentXmlConf = &CustomXmlConf;
    } else
        CurrentXmlConf = &DefaultXmlConf;
}

void TRTYParsedDocStorage::SetDocsAttrs(TFullDocAttrs& docsAttrs) {
    DocsAttrs = &docsAttrs;
}

class TZoneToProp : public TZoneExtractor {
public:
    TZoneToProp(IZoneAttrConf& zoneAttrConf, const TZoneExtractorConfig& config)
        : TZoneExtractor(zoneAttrConf, config)
    {}

    void WriteProps(TFullDocAttrs& docAttr) const {
        for (const auto& z : ZonesDescr)
            if (!!z.second.Config.PropName)
                for (const auto& val : z.second.Content) {
                    docAttr.AddAttr(z.second.Config.PropName, val, TFullDocAttrs::AttrArcText);
                }
    }
};

void TRTYParsedDocStorage::OnAfterParseDoc(IParsedDocProperties* /*docProps*/, const TDocInfoEx* docInfo) {
    if (ZoneExtCfg.empty())
        return;
    IZoneAttrConf* zoneAttrConf;
    THolder<TAttributeExtractor> attrExtractor;
    if (docInfo->DocHeader->MimeType == MIME_XML)
        zoneAttrConf = CurrentXmlConf;
    else {
        attrExtractor.Reset(new TAttributeExtractor(GetHtConf()));
        zoneAttrConf = attrExtractor.Get();
    }
    TZoneToProp ext(*zoneAttrConf, ZoneExtCfg);
    DoSomething(&ext);
    ext.WriteProps(*DocsAttrs);
}
