#include "rty_index_text_data.h"

#include <saas/rtyserver/components/zones_makeup/makeup_storage_interface.h>

#include <library/cpp/charset/recyr.hh>
#include <kernel/tarc/iface/dtiterate.h>
#include <kernel/tarc/markup_zones/text_markup.h>

#include <util/stream/str.h>


namespace {
    /// TGetArcTitleHandler: implements "class THandler" from dtiterate.h
    ///
    /// @details There are two kinds of "titles", we support both:
    /// - static (EArchiveZone:AZ_TITLE - see TitleStaticZone)
    /// - dynamic ("id" is always a power of 2, needs to be configured in RTYServer);
    ///
    class TGetArcTitleHandler {
    private:
        TArchiveZone TitleStaticZone;
        const ISentenceZonesReader* TitleDynamicZone;
        TSentenceZones DynamicZonesMask;
        ui32 TitleSentNum;
        ui32 DocId;

        TStringStream Title;

    public:
        bool OnHeader(const TArchiveTextHeader*) {
            return true;
        }

        bool OnBeginExtendedBlock(const TArchiveTextBlockInfo&) {
            return true;
        }

        bool OnEndExtendedBlock() {
            return true;
        }

        bool OnBeginBlock(ui16, const TArchiveTextBlockInfo&) {
            return true;
        }

        bool OnEndBlock() {
            return true;
        }

        bool OnWeightZones(TMemoryInput*) {
            return true;
        }

        void OnEnd() {
        }

    private:
        bool IsStaticZoneEnabled() const {
            return DynamicZonesMask == TSentenceZones() && TitleSentNum == Max<ui32>();
        }

        Y_FORCE_INLINE bool IsDynamicZoneEnabled() const {
            return DynamicZonesMask != TSentenceZones();
        }

    public:
        TGetArcTitleHandler(const ISentenceZonesReader* sentReader, TSentenceZones zonesMask, ui32 titleSentNum, ui32 docId)
            : TitleDynamicZone(sentReader)
            , DynamicZonesMask(zonesMask)
            , TitleSentNum(titleSentNum)
            , DocId(docId)
        {
            Y_VERIFY(sentReader || !DynamicZonesMask);
        }

        bool OnMarkupInfo(const void* markupInfo, size_t markupInfoLen) {
            if (IsStaticZoneEnabled()) {
                TArchiveMarkupZones arcZones;
                UnpackMarkupZones(markupInfo, markupInfoLen, &arcZones);
                TitleStaticZone = arcZones.GetZone(AZ_TITLE);
                // we except that the Title starts at the beginning of a sentence (it is always this way)
                Y_ASSERT(TitleStaticZone.Spans.empty() || TitleStaticZone.Spans[0].OffsetBeg == 0);
            }
            return true;
        }

        bool IsTitle(const size_t sentNum) const {
            if (TitleSentNum != Max<ui32>() && sentNum <= TitleSentNum) {
                return true;
            }
            for (const TArchiveZoneSpan& span : TitleStaticZone.Spans) {
                if (sentNum >= span.SentBeg && sentNum <= span.SentEnd) {
                    return true;
                }
            }
            if (IsDynamicZoneEnabled()) {
                const TSentenceZones zones = TitleDynamicZone->GetSentZones(DocId, sentNum);
                return (zones & DynamicZonesMask) != 0;
            }
            return false;
        }

        static void OnTitlePart(TStringStream& result, const TString& sentence) {
            if (!result.Empty()) {
                result << ' ';
            }
            result << sentence;
        }

        bool OnSent(size_t sentNum, ui16 sentFlag, const void* sentBytes, size_t sentBytesLen) {
            // Unfortunately, we have to recode strings here
            // see also: TUnpacker::TImpl in ysite/yandex/snippets/archive/unpacker.cpp
            if (IsTitle(sentNum)) {
                TString recodedStr;
                if (sentFlag & SENT_HAS_EXTSYMBOLS) {
                    const TWtringBuf buf(static_cast<const TWtringBuf::char_type*>(sentBytes), sentBytesLen / sizeof(TWtringBuf::char_type));
                    recodedStr = WideToUTF8(buf);
                } else {
                    const TStringBuf buf(static_cast<const TStringBuf::char_type*>(sentBytes), sentBytesLen);
                    const bool recoded = Recode(CODES_YANDEX, CODES_UTF8, buf, recodedStr);
                    Y_ENSURE(recoded);
                }
                OnTitlePart(Title, recodedStr);
            }
            return true;
        }

    public:
        TString&& Detach() {
            return std::move(Title.Str());
        }
    };
}

namespace NRTYServer {
    TString GetArcTitle(const ui8* blob, ui32 docId, const ISentenceZonesReader* sentReader, TSentenceZones zonesMask, const ui32 titleNumSent) {
        Y_ASSERT(blob);
        Y_ASSERT(sentReader || !zonesMask);

        TGetArcTitleHandler callback(sentReader, zonesMask, titleNumSent, docId);
        IterateArchiveDocText(blob, callback);
        return callback.Detach();
    }
}


TSentenceZones TRTYIndexTextData::GetTitleZonesMask() const {
    if (!Data.Makeup) {
        return TSentenceZones();
    }

    return Data.Makeup->GetZonesDescription()->GetZonesMask(NZonesMakeup::EZoneRole::Title);
}

