#include "prep_normalizer.h"

#include <saas/rtyserver/components/fullarchive/disk_manager.h>

#include <robot/jupiter/library/rtdoc/file/writer.h>
#include <robot/jupiter/library/rtdoc/file/model/files.h>
#include <robot/jupiter/library/rtdoc/protos/lumps.pb.h>
#include <saas/rtyserver/indexer_core/index_dir.h>

#include <util/folder/path.h>

class TPrepNormalizer::TImpl final {
private:
    THashSet<TString> Filter = {
        "attrs_keyinv", "calculated_attrs_with_keyinv_doc_chunk_mapping_single_chunk", "erf_doc_lumps", "erf_global_lumps",
        "ext_info_arc", "host_attrs", "host_attrs_doc_lumps", "host_attrs_global_lumps", "key_inv_sentence_lengths_doc_lumps",
        "linkann_data_doc_lumps", "linkann_sentence_lengths_doc_lumps", "walrus", "walrus_with_keyinv_doc_chunk_mapping_single_chunk"};

public:
    TImpl() {
    }

    const THashSet<TString> GetFilter() const {
        return Filter;
    };
};

TPrepNormalizer::TPrepNormalizer(const TRTYServerConfig& config)
    : NRTYServer::INormalizer(config)
    , Impl(MakeHolder<TImpl>())
{
}

TPrepNormalizer::~TPrepNormalizer() {
}

const char* TPrepNormalizer::Name() const {
    return "PrepNormalizer";
}

bool TPrepNormalizer::AllRight(const NRTYServer::TNormalizerContext& context, const THolder<TFileMap>& /*indexFrq*/) const {
    const TFsPath directory = context.Dir.PathName();

    // Rebuild is triggered by removing prep.lumps.dir (while prep.lumps stays), or creating 'prep.lumps.rebuild' empty file

    NRtDoc::TPrepTableBase oldFiles(directory / NRtDoc::TBuilderFiles::MainPrepFile);
    const TFsPath arc1 = oldFiles.GetFilePath();
    const TFsPath arc2 = oldFiles.GetFileIndexPath();
    const TFsPath arc3 = oldFiles.GetFilePath() + ".rebuild";

    return arc1.Exists() && arc2.Exists() && !arc3.Exists();
}

void TPrepNormalizer::Fix(const NRTYServer::TNormalizerContext& context, const THolder<TFileMap>& indexFrq) const {
    if (AllRight(context, indexFrq)) {
        return;
    }

    const auto archive = context.Managers.GetManager<TDiskFAManager>(FULL_ARCHIVE_COMPONENT_NAME);
    const TFsPath directory = context.Dir.PathName();
    AssertCorrectConfig(archive, "FullArchive is required for TPrepNormalizer");
    AssertCorrectIndex(archive->IsOpened(), "FullArchive must be opened for TPrepNormalizer");

    WARNING_LOG << "Restoring Prep indexes in " << directory.Basename() << Endl;

    THashSet<TString> filter;
    if (NRTYServer::HasIndexDirPrefix(directory, DIRPREFIX_INDEX)) {
        filter = Impl->GetFilter();
    } else {
        // for non-final indexes, save all PrepIds
    }
    const bool filterEmpty = filter.empty();

    const ui32 endDocId = indexFrq->GetFile().GetLength() / 2;

    NRtDoc::TPrepTableBase(directory / NRtDoc::TBuilderFiles::MainPrepFile).DeleteIfExists();

    NRtDoc::TPrepWriter prep(directory / NRtDoc::TBuilderFiles::MainPrepFile);

    TBuffer buffer;
    ui32 tmpDocId = 0;
    for (auto iterator = archive->CreateSampler(); tmpDocId < endDocId; tmpDocId++) {
        iterator->Seek(tmpDocId);
        if (!iterator->IsValid())
            continue;

        const ui32 realDocId = iterator->GetDocId();
        if (archive->IsRemoved(realDocId)) {
            continue;
        }

        const NRTYServer::TParsedDoc& pd = iterator->GetParsedDoc();
        if (!pd.HasDocument() || !pd.GetDocument().HasIndexedDoc()) {
            continue;
        }

        const NRealTime::TIndexedDoc& idoc = pd.GetDocument().GetIndexedDoc();

        NJupiter::TMercuryLumps resData;
        for (ui32 i = 0; i < idoc.WadLumpsSize(); i++) {
            const NRealTime::TIndexedDoc::TMapType& srcLump = idoc.GetWadLumps(i);
            const TString& name = srcLump.GetName();
            if (!filterEmpty && filter.count(name))
                continue;

            NJupiter::TMapType* lump = resData.AddLumps();
            lump->SetName(name);
            lump->SetData(srcLump.GetData());
        }
        buffer.Resize(resData.ByteSize());
        CHECK_WITH_LOG(resData.SerializeToArray(buffer.Data(), buffer.Size()));
        prep.AddData(realDocId, TBlob::FromBufferSingleThreaded(buffer));
    }

    prep.Finish();
    TFsPath(prep.GetFilePath() + ".rebuild").DeleteIfExists();

    WARNING_LOG << "Restored Prep indexes in " << directory.Basename() << Endl;
}
