#include "rty_indexer.h"
#include "index_component.h"

#include <saas/rtyserver/config/common_indexers_config.h>
#include <saas/rtyserver/config/config.h>
#include <saas/rtyserver/config/grouping_config.h>
#include <saas/rtyserver/key_inv_processor/memory_portions.h>

#include <yweb/realtime/indexer/common/indexing/yndex_reader.h>
#include <yweb/realtime/indexer/common/indexing/onedoc_yndex_reader.h>
#include <yweb/realtime/indexer/common/indexing/index_common.h>
#include <yweb/realtime/indexer/common/indexing/index_reader.h>
#include <yweb/realtime/indexer/common/indexing/setdoc_index_reader.h>

#include <kernel/indexer/faceproc/docattrinserter.h>
#include <kernel/groupattrs/docsattrswriter.h>
#include <kernel/tarc/merge/builder.h>
#include <kernel/walrus/advmerger.h>

void TRTYIndexer::StoreIndexedDoc(const TIndexParsedEntity& docIndex) {
    VERIFY_WITH_LOG(docIndex.GetIndexedDoc(), "Incorrect usage TRTYIndexer::StoreIndexedDoc");

    const NRealTime::TIndexedDoc& indexedDoc = *docIndex.GetIndexedDoc();
    const TDocInfoEx& docInfo = docIndex.GetDocInfo();

    NRealTime::TRTOldOneDocIndexReader<NRealTime::TIndexedDoc_TEntry> indexReader(indexedDoc.GetEntries());
    NRealTime::TSetDocIndexReader docIndexReader(indexReader, docInfo.DocId);

    SUPERLONG* dBuffer = (SUPERLONG*)ReadBuffer.Data();

    for (; !indexReader.IsFinished(); indexReader.NextKey()) {
        NRealTime::NRTCodecs::TKeyStorage key;
        indexReader.LoadKey(key);
        if (key[0] == 0) {
            continue;
        }

        PositionsBuffer.Clear();
        SUPERLONG* dataEnd;
        while ((dataEnd = docIndexReader.LoadHits(dBuffer, dBuffer + 8192)) != dBuffer) {
            PositionsBuffer.Append((const char*)dBuffer, (const char*)dataEnd);
        }
        for (auto& storage: indexReader.IsAttrKey() ? StoragesAttr : StoragesLemm) {
            storage->StorePositions(key, (SUPERLONG*)PositionsBuffer.Data(), PositionsBuffer.Size() / sizeof(SUPERLONG));
        }
    }

    TFullDocAttrs fda;
    TIndexedDocStorage::FillAttributesToExtAttrs(fda, indexedDoc);
    GroupProcessor->CommitDoc(nullptr, docInfo.DocId, fda);

    if (ExternalIndexers.SentenceLengthsWriter) {
        TString lens = indexedDoc.GetSentencesLength();
        lens.prepend('\0');
        ExternalIndexers.SentenceLengthsWriter->Add(docInfo.DocId, reinterpret_cast<const ui8*>(lens.data()), lens.size());
    }

    if (indexedDoc.HasArchive()) {
        const TString& archive = indexedDoc.GetArchive();
        TArchiveHeader* ah = (TArchiveHeader*)(archive.data());
        ah->DocId = docInfo.DocId;
        ExternalIndexers.TextArchiveWriter->IndexDoc(docInfo.DocId, TBlob::NoCopy(archive.data(), archive.size()), Thread);
    }
}

void TRTYIndexer::BuildExtAttrs(const TDocInfoEx& docInfo, TFullDocAttrs& docAttrs) const {
    if (RTYConfig.GetCommonIndexers().StoreDateAttrsMsk && docInfo.ModTime) {
        constexpr time_t tzOffsetMsk = 3 * 3600;
        TDocAttrInserter(&docAttrs).StoreDateTimeAttr("date", docInfo.ModTime + tzOffsetMsk);
    }
}

void TRTYIndexer::BuildIndexedDoc(const TIndexParsedEntity& docIndex) {
    VERIFY_WITH_LOG(!docIndex.GetIndexedDoc(), "Incorrect usage TRTYIndexer::BuildIndexedDoc");
    const TDocInfoEx& docInfo = docIndex.GetDocInfo();
    TAtomicSharedPtr<NRealTime::TIndexedDoc> indexedDoc = MakeAtomicShared<NRealTime::TIndexedDoc>();

    ParsedDocStorage->SetCustomConfig(docIndex.GetCustomParserConfig().data());
    ParsedDocStorage->SetDocsAttrs(docIndex.MutableExtAttrs());
    BuildExtAttrs(docInfo, docIndex.MutableExtAttrs());
    IndexedDocStorage->IndexDoc(docInfo, docIndex.MutableExtAttrs(), indexedDoc.Get(), docInfo.FullUrl, false);

    docIndex.SetIndexedDoc(indexedDoc);
}

void TRTYIndexer::IndexDoc(const TParsedDocument& document) {
    const TIndexParsedEntity* docIndex = document.GetComponentEntity<TIndexParsedEntity>(INDEX_COMPONENT_NAME);
    VERIFY_WITH_LOG(docIndex, "there is no Index in doc %s", document.GetDocSearchInfo().GetUrl().data())
    try {
        if (!docIndex->GetIndexedDoc())
            BuildIndexedDoc(*docIndex);
        StoreIndexedDoc(*docIndex);
    } catch(...) {
        const TDocInfoEx& docInfo = docIndex->GetDocInfo();
        ERROR_LOG << "IndexDoc failed: " << CurrentExceptionMessage() << " Url=" << docInfo.FullUrl << " DocText=" << TStringBuf(docInfo.DocText, docInfo.DocSize) << Endl;
        throw;
    }
}

void TRTYIndexer::IncDoc(ui32 docId) {
    Counter++;
    MemoryPortionAttr->IncDoc();
    MemoryPortionBody->IncDoc();
    DocIds.push_back(docId);
    if (Counter == MaxPortionDocs) {
        const TString portionPrefix(TString::Join(Directory, '/', Prefix, ToString(NumPortion), "p\n"));
        PlsFile->Write(portionPrefix.data(), portionPrefix.size());
        NumPortion++;
        Counter = 0;
    }
}

void TRTYIndexer::RegisterStorageAttr(IYndexStorage* storage) {
    StoragesAttr.push_back(storage);
}

void TRTYIndexer::RegisterStorageLemm(IYndexStorage* storage) {
    StoragesLemm.push_back(storage);
}

void TRTYIndexer::RegisterInvCreator(NIndexerCore::IDirectTextCallback4* callback) {
    InvCreators.push_back(callback);
}

void TRTYIndexer::RegisterDTCallback(NIndexerCore::IDirectTextCallback2* callback) {
    DTCallbacks.push_back(callback);
}

TRTYIndexer::TRTYIndexer(const TDirConf& config, const TBaseConfig& baseConfig, int thread, const TRTYServerConfig& ownerConfig, const TExternalIndexers& externalIndexers)
    : RTYConfig(ownerConfig)
    , ExternalIndexers(externalIndexers)
    , BaseConfig(baseConfig)
    , Thread(thread)
    , IsWorking(false)
    , PositionsBuffer(sizeof(SUPERLONG) * 8192)
    , ReadBuffer(sizeof(SUPERLONG) * 8192)
{
    Prefix = ToString(thread) + "_";

    Directory = config.TempDir;
    Counter = NumPortion = 0;

    MaxPortionDocs = BaseConfig.DocCount;
    BaseConfig.DocCount = 1;
    BaseConfig.MaxMemory = 0;

    VERIFY_WITH_LOG(NFs::Exists(Directory), "Incorrect directory path %s", Directory.data());

    BaseConfig.Indexaa = config.TempPrefix + Prefix + "aa";
    BaseConfig.IndexaaVersion = RTYConfig.GetCommonIndexers().GroupingConfig->GetVersion();
    GroupProcessor.Reset(new TGroupProcessor(&BaseConfig, ExternalIndexers.MetaInfoCreator));
    BaseConfig.Indexaa = "";
    IndexedDocStorageConfig.Reset(new TIndexedDocStorageConfig(BaseConfig));
    IndexedDocStorageConfig->UseDater = false;
    IndexedDocStorageConfig->IsRtYServer = true;
    IndexedDocStorageConfig->UseSentenceLengthProcessor = true;
    IndexedDocStorageConfig->UseArchive = RTYConfig.GetCommonIndexers().StoreTextToArchive;
    IndexedDocStorageConfig->StripKeysOnIndex = true;

    MemoryPortionAttr.Reset(new TMemoryPortionAttr(IYndexStorage::PORTION_FORMAT_ATTR, MaxPortionDocs, Prefix, "a", config.TempDir));
    MemoryPortionBody.Reset(new TMemoryPortionBody(IYndexStorage::PORTION_FORMAT_LEMM, MaxPortionDocs, Prefix, "l", config.TempDir));

    StoragesAttr.push_back(MemoryPortionAttr.Get());
    StoragesLemm.push_back(MemoryPortionBody.Get());
}

void TRTYIndexer::Stop() {
    VERIFY_WITH_LOG(IsWorking, "Incorrect TRTYIndexer stop method usage");
    ui32 portion = NumPortion;
    DEBUG_LOG << "status=rty_indexer_stop;action=start;prefix=" << Prefix << ";portion=" << NumPortion << ";dir=" << Directory << Endl;
    if (Counter > 0) {
        const TString portionPrefix(TString::Join(Directory, '/', Prefix, ToString(NumPortion), "p\n"));
        PlsFile->Write(portionPrefix.data(), portionPrefix.size());
        NumPortion++;
        Counter = 0;
    }
    if (!NFs::Exists(Directory))
        ERROR_LOG << "status=rty_indexer_stop;info=directory_is_absent;dir=" << Directory << Endl;
    DEBUG_LOG << "status=rty_indexer_stop;action=ad_save;prefix=" << Prefix << ";portion=" << portion << ";dir=" << Directory << Endl;
    ::Save(&*PlsAdFile, DocIds);
    DEBUG_LOG << "status=rty_indexer_stop;action=pls_ad_flushing;prefix=" << Prefix << ";portion=" << portion << ";dir=" << Directory << Endl;
    PlsAdFile->Flush();
    DEBUG_LOG << "status=rty_indexer_stop;action=pls_flushing;prefix=" << Prefix << ";portion=" << portion << ";dir=" << Directory << Endl;
    PlsFile->Flush();
    DEBUG_LOG << "status=rty_indexer_stop;action=finished;prefix=" << Prefix << ";portion=" << portion << ";dir=" << Directory << Endl;
    IsWorking = false;
}

namespace {
    class TNGrammBuilderCallback: public NIndexerCore::IDirectTextCallback2 {
    private:
        const TString ZoneName;
        const ui8 NBase;
    public:

        TNGrammBuilderCallback(const TString& zoneName, ui8 nBase)
            : ZoneName(zoneName)
            , NBase(nBase)
        {

        }

        void SetCurrentDoc(const TDocInfoEx& /*docInfo*/) override {
        }

        void ProcessDirectText2(IDocumentDataInserter* inserter, const NIndexerCore::TDirectTextData2& directText, ui32 /*docId*/) override {
            for (ui32 i = 0; i < directText.ZoneCount; ++i) {
                if (directText.Zones[i].Zone == ZoneName) {
                    for (ui32 ent = 0; ent < directText.EntryCount; ++ent) {
                        TWtringBuf buf = directText.Entries[ent].Token;
                        if (buf.size() > NBase) {
                            for (ui32 c = 0; c <= buf.size() - NBase; ++c) {
                                TUtf16String token(buf.data(), c, NBase);
                                inserter->StoreLemma(token.data(), token.size(), token.data(), token.size(), 0, directText.Entries[ent].Posting, LANG_UNK);
                            }
                        }
                    }
                }
            }
        }

        void Finish() override {

        }
    };
}

void TRTYIndexer::Start() {

    DEBUG_LOG << "status=rty_indexer_start;prefix=" << Prefix << ";portion=" << NumPortion << ";dir=" << Directory << Endl;
    TIndexedDocStorageConstructionData idscd(*IndexedDocStorageConfig, AttrProcessorConfig);
    ParsedDocStorage = new TRTYParsedDocStorage(BaseConfig, RTYConfig.GetCommonIndexers().ZonesToProperties, RTYConfig.GetCommonIndexers().BackwardCompatibleTokenization);
    idscd.CustomParsedDocStorage.Reset(ParsedDocStorage);
    idscd.EventsProcessor = this;
    IndexedDocStorage.Reset(new TIndexedDocStorage(idscd));

    for (auto&& i : InvCreators) {
        IndexedDocStorage->AddInvCreator(i);
    }

    for (auto&& i : RTYConfig.GetCommonIndexers().NGrammZonesVector) {
        NGrammsCallbacks.push_back(new TNGrammBuilderCallback(i.first, i.second.NBase));
        IndexedDocStorage->AddDTCallback(NGrammsCallbacks.back().Get());
    }

    for (auto&& i : DTCallbacks) {
        IndexedDocStorage->AddDTCallback(i);
    }

    PlsAdFile.Reset(new TFixedBufferFileOutput(Directory + "/tmp" + Prefix + "plsad"));
    PlsFile.Reset(new TFixedBufferFileOutput(Directory + "/tmp" + Prefix + "pls"));
    IsWorking = true;
    DEBUG_LOG << "status=rty_indexer_start_ok;prefix=" << Prefix << ";portion=" << NumPortion << ";dir=" << Directory << Endl;
}

TRTYIndexer::~TRTYIndexer() {
    DEBUG_LOG << "status=rty_indexer_before_destruction;prefix=" << Prefix << ";portion=" << NumPortion << ";dir=" << Directory << Endl;
    VERIFY_WITH_LOG(!IsWorking, "Incorrect TRTYIndexer usage");
    GroupProcessor->Term();
    GroupProcessor.Destroy();
    MemoryPortionBody->Close();
    MemoryPortionAttr->Close();
    DEBUG_LOG << "status=rty_indexer_after_destruction;prefix=" << Prefix << ";portion=" << NumPortion << ";dir=" << Directory << Endl;
}
