#include "builder.h"
#include "const.h"
#include "config.h"
#include "manager.h"
#include "parsed_entity.h"

#include <saas/rtyserver/components/ann/arraydata/writer.h>
#include <saas/rtyserver/components/ann/storage/writer.h>
#include <saas/rtyserver/components/ann/storage/accessor.h>
#include <saas/rtyserver/config/indexer_config.h>
#include <saas/rtyserver/config/searcher_config.h>
#include <saas/rtyserver/config/realm_config.h>
#include <saas/rtyserver/factors/factors_config.h>
#include <saas/rtyserver/indexer_core/indexer_core.h>
#include <saas/rtyserver/components/search/sentence_lengths/writer.h>
#include <saas/rtyserver/key_inv_processor/ki_actor.h>

#include <kernel/indexann/writer/writer.h>
#include <ysite/directtext/sentences_lens/sentenceslensproc.h>

#include <library/cpp/logger/global/global.h>

using namespace NRTYAnn;

TAnnIndexBuilder::TAnnIndexBuilder(const TDirConf& dirConfig, const TRTYServerConfig& rtyConfig, const TString& componentName)
    : NRTYServer::IIndexComponentBuilder(componentName)
    , Config(rtyConfig.ComponentsConfig.Get<TAnnComponentConfig>(componentName))
    , UseLanguageFromConf(Config->RecognizerDataPath == "NOTSET")
    , AnnConfig(MakeHolder<NIndexAnn::TWriterConfig>(UseLanguageFromConf ? Default<TString>() : Config->RecognizerDataPath))
{
    CHECK_WITH_LOG(Config);
    AnnConfig->InvIndexDocCount = rtyConfig.GetRealmListConfig().GetMainRealmConfig().GetIndexerConfigDisk().MaxDocuments;
    AnnConfig->InvIndexMem = 0;
    for (const auto& dest : rtyConfig.GetSearcherConfig().Factors->GetAnnStreams()) {
        Writers[dest.first] = MakeHolder<TAnnWriter>(dirConfig, *Config, rtyConfig, *AnnConfig, dest.first);
    }
}

TAnnIndexBuilder::~TAnnIndexBuilder() {
}

bool TAnnIndexBuilder::DoClose(const NRTYServer::TBuilderCloseContext& context) {
    TIndexMetadataProcessor meta(context.SrcDir.PathName());
    meta->ClearAnnHeaders();
    for (auto& writer : Writers) {
        writer.second->Close(context);
        NRTYServer::TIndexMetadata::TAnnotationHeader* header = meta->AddAnnHeaders();
        header->SetName(writer.first);
        header->SetDataFormat(ToString(Config->DataType));
    }
    return true;
}

void TAnnIndexBuilder::Index(int threadId, const TParsedDocument& document, const ui32 docId) {
    const TAnnParsedEntity* entity = document.GetComponentEntity<TAnnParsedEntity>(NRTYServer::AnnComponentName);
    CHECK_WITH_LOG(entity);
    for (auto& writer : Writers)
        writer.second->StartDoc(threadId, docId, document.GetDocSearchInfo().GetKeyPrefix());
    for (const auto& i : entity->GetSentences()) {
        NIndexAnn::TSentenceParams sentP;
        sentP.TextLanguage = i.GetTextLanguage();
        if (UseLanguageFromConf && !sentP.TextLanguage.Defined()) {
            sentP.TextLanguage = Config->DefaultLanguage;
        }

        for (const TString& dest : i.GetDestinations())
            Writers[dest]->StartSentence(threadId, i.GetSentence(), sentP);

        const TMap<ui32, TAnnParsedEntity::TStreamsStorage>& regions = i.GetRegions();
        for (const auto& region : regions) {
            for (const auto& stream : region.second) {
                Writers[stream.Destination]->AddData(threadId, region.first, stream.Index, stream.Data);
            }
        }
    }
    for (auto& writer : Writers)
        writer.second->FinishDoc(threadId, docId);
}


TAnnIndexBuilder::TAnnWriter::TAnnWriter(const TDirConf& dirConfig,
                                         const TAnnComponentConfig& componentConfig,
                                         const TRTYServerConfig& globalConfig,
                                         const NIndexAnn::TWriterConfig& annWriterConfig,
                                         const TString& streamsIndex)
    : ComponentConfig(componentConfig)
    , Prefix(dirConfig.NewPrefix + streamsIndex)
    , DocumentsStorage(TArchiveOwner::Create(
                        Prefix, componentConfig.MultipartConfig, 0))
{
    for (ui32 thread = 0; thread < globalConfig.GetRealmListConfig().GetMainRealmConfig().GetIndexerConfigDisk().Threads; ++thread) {
        auto dw = CreateAnnDataWriter(*globalConfig.GetSearcherConfig().Factors->GetAnnStreams(streamsIndex));
        AnnWriters.push_back(new NIndexAnn::TWriter(Prefix + "_" + ToString(thread) + ".",
                                                    dirConfig.TempDir,
                                                    annWriterConfig,
                                                    dw));
    }
}

void TAnnIndexBuilder::TAnnWriter::Close(const NRTYServer::TBuilderCloseContext& context) {
    TAdvancedMergeTaskWriter mergerTaskWriter;

    for (ui32 i = 0; i < AnnWriters.size(); ++i) {
        AnnWriters[i].Drop();
        mergerTaskWriter.AddInput(Prefix + "_" + ToString(i) + ".", IYndexStorage::FINAL_FORMAT);
    }

    mergerTaskWriter.AddOutput(Prefix + ".");

    if (context.RemapTable) {
        DocumentsStorage->Remap(*context.RemapTable);

        mergerTaskWriter.GetTask()->FinalRemapTable.Create(1, 0);
        for (ui32 i = 0; i < context.RemapTable->size(); ++i) {
            if ((*context.RemapTable)[i] != REMAP_NOWHERE)
                mergerTaskWriter.GetTask()->FinalRemapTable.SetNewDocId(0, i, (*context.RemapTable)[i]);
            else
                mergerTaskWriter.GetTask()->FinalRemapTable.SetDeletedDoc(0, i);
        }
    }

    TAdvancedIndexMerger aim(mergerTaskWriter.GetTask());
    VERIFY_WITH_LOG(aim.Run("sent"), "Incorrect advanced merger behavior");

    for (ui32 i = 0; i < AnnWriters.size(); ++i) {
        NFs::Remove(Prefix + "_" + ToString(i) + ".key");
        NFs::Remove(Prefix + "_" + ToString(i) + ".inv");
    }

    DocumentsStorage.Drop();
}

void TAnnIndexBuilder::TAnnWriter::StartDoc(ui32 threadId, ui32 docId, ui64 keyPrefix) {
    AnnWriters[threadId]->StartDoc(docId, keyPrefix);
}

void TAnnIndexBuilder::TAnnWriter::FinishDoc(ui32 threadId, ui32) {
    AnnWriters[threadId]->FinishDoc(true);
}

void TAnnIndexBuilder::TAnnWriter::AddData(ui32 threadId, ui32 region, ui32 streamId, const TString& data) {
    AnnWriters[threadId]->AddData(region, streamId, TArrayRef<const char>(data.data(), data.size()));
}

void TAnnIndexBuilder::TAnnWriter::StartSentence(ui32 threadId, const TUtf16String& sentence, const NIndexAnn::TSentenceParams& sentP) {
    AnnWriters[threadId]->StartSentence(sentence, sentP);
}

TAtomicSharedPtr<NIndexAnn::IDocDataWriter> TAnnIndexBuilder::TAnnWriter::CreateAnnDataWriter(
    const NRTYFactors::TAnnFactorsList& streamsInfo
) const {
    switch (ComponentConfig.DataType) {
    case NRTYAnn::EDataFormat::COMPTRIE:
        return MakeAtomicShared<TStreamDataWriter>(DocumentsStorage, new TAnnDataAccessor(streamsInfo));
    case NRTYAnn::EDataFormat::PLAIN_ARRAY:
        return NewArrayDocDataWriter(DocumentsStorage);
    }
    Y_VERIFY(false);
}
