#include "zone_extractor.h"
#include <util/string/strip.h>
#include <library/cpp/logger/global/global.h>

TZoneExtractor::TZoneDescr::TZoneDescr(const TZoneExtractorConfigItem& config)
    : Started(0)
    , Config(config)
{}

TZoneExtractor::TZoneExtractor(IZoneAttrConf& zoneAttrConf, const TZoneExtractorConfig& config)
    : DocProperties(CreateParsedDocProperties())
    , ZoneAttrConf(zoneAttrConf)
{
    for (const auto& z : config)
        ZonesDescr.insert(std::make_pair(z.first, TZoneDescr(z.second)));
}

void TZoneExtractor::OnMarkup(const THtmlChunk& evnt)
{
    TZoneEntry result;
    ZoneAttrConf.CheckEvent(evnt, DocProperties.Get(), &result);
    if (evnt.GetLexType() == HTLEX_START_TAG) {
        OnText(evnt, true);
        if (result.Name && *result.Name && result.IsOpen)
            OnBeginZone(evnt, result);
        OnOpenTag(evnt);
    }
    if (evnt.GetLexType() == HTLEX_END_TAG) {
        OnCloseTag(evnt);
        OnText(evnt, true);
    }
}

void TZoneExtractor::OnOpenTag(const THtmlChunk& evnt)
{
    TTagsInfo::iterator tag = TagsInfo.find(evnt.Tag->lowerName);
    if (tag != TagsInfo.end())
        ++tag->second.Level;
}

void TZoneExtractor::OnCloseTag(const THtmlChunk& evnt)
{
    TTagsInfo::iterator tag = TagsInfo.find(evnt.Tag->lowerName);
    if (tag != TagsInfo.end()) {
        TTagInfo& tagInfo = tag->second;
        --tagInfo.Level;
        TTagCloseWaiters::iterator level = tagInfo.CloseWaiters.find(tagInfo.Level);
        if (level != tagInfo.CloseWaiters.end()) {
            for (TZoneDescrSet::iterator waiter = level->second.begin(); waiter != level->second.end(); ++waiter)
                OnEndZone(evnt, *waiter);
            tagInfo.CloseWaiters.erase(level);
            if (tagInfo.CloseWaiters.empty())
                TagsInfo.erase(tag);
        }
    }
}

void TZoneExtractor::OnBeginZone(const THtmlChunk& evnt, const TZoneEntry& zoneEntry)
{
    TZonesDescription::iterator iter = ZonesDescr.find(zoneEntry.Name);
    if (iter != ZonesDescr.end()) {
        if (!iter->second.Started && iter->second.Config.JoinParagraphs && (!iter->second.Config.JoinSpans || iter->second.Content.empty()))
            iter->second.Content.push_back(TString());

        ++iter->second.Started;
        TTagInfo& tagInfo = TagsInfo[evnt.Tag->lowerName];
        tagInfo.CloseWaiters[tagInfo.Level].insert(&iter->second);
    }
}

void TZoneExtractor::OnEndZone(const THtmlChunk&, TZoneExtractor::TZoneDescr* descr)
{
    --descr->Started;
    VERIFY_WITH_LOG(descr->Started >= 0, "zone %s end more times than start", (descr->Config.Name).data());
}

void TZoneExtractor::OnText(const THtmlChunk& evnt, bool tag)
{
    for (TZonesDescription::iterator it = ZonesDescr.begin(); it != ZonesDescr.end(); ++it) {
        if (it->second.Started) {
            TString str(evnt.text, evnt.leng);
            if (it->second.Config.StripTags) {
                if (tag)
                    continue;
                StripInPlace(str);
            } else if (it->second.Config.Strip) {
                StripInPlace(str);
            }

            if (it->second.Config.JoinParagraphs) {
                Y_VERIFY(!it->second.Content.empty(), "zone must be open");
                it->second.Content.back().append(str);
            }  else {
                it->second.Content.push_back(str);
            }
        }
    }
}

THtmlChunk* TZoneExtractor::OnHtmlChunk(const THtmlChunk& chunk) {
    if (chunk.flags.type == PARSED_MARKUP && chunk.flags.markup != MARKUP_IGNORED)
        OnMarkup(chunk);
    else
        OnText(chunk, chunk.GetLexType() != HTLEX_TEXT);
    return nullptr;
}
