#include "yt_indexing_mapper.h"
#include "fullarc_toolkit.h"

#include <saas/rtyserver/config/const.h>
#include <saas/library/yt/file/yt_file_dumper.h>
#include <saas/library/yt/stream/yt_chunked_output.h>

#include <util/folder/filelist.h>
#include <util/folder/path.h>
#include <util/system/fs.h>
#include <util/generic/guid.h>
#include <util/generic/ptr.h>
#include <util/stream/file.h>
#include <util/string/vector.h>

REGISTER_MAPPER(TSaasYTIndexingMapper);

static void DumpIndexSegment(const TFsPath& indexDir, const TYTFileDumper& headerDumper, const TMaybe<TYTFileDumper>& dataDumper);

/*
 * This is an ordered map on ShardId+SegmentId value but some jobs will fall on the border (or even borders) between
 * segments. In that case we need to produce multiple index parts, one for each segment.
 * We also assume that job input is always small enough to fit into TmpFS.
 * This is a reasonable assumption because both job input limits and TmpFS size are configured in the job spec so it's
 * a question of the job spec consistency.
 */
void TSaasYTIndexingMapper::Do(TReader* input, TWriter* output) {
    while (input->IsValid()) {  // loop on ShardId and SegmentId in case it changes within one job
        const TShardId currentShardId = input->GetRow().GetShardId();
        const ui32 currentSegmentId = input->GetRow().GetSegmentId();

        THolder<TStandaloneIndexer> indexer = MakeHolder<TStandaloneIndexer>(ConfigBundle.Text, ConfigBundle.Variables, RunModules);
        indexer->EnableIndexing();
        for (; input->IsValid(); input->Next()) {
            const auto& inRow = input->GetRow();
            if (inRow.GetShardId() != currentShardId || inRow.GetSegmentId() != currentSegmentId) {
                break;  // close and dump the index, then proceed to the next segment
            }
            if (FailOnIndexError) {
                indexer->IndexUnsafe(inRow.GetDocument());
            } else {
                indexer->Index(inRow.GetDocument());
            }
        }
        const TFsPath indexDir = indexer->GetIndexerDir();
        indexer.Destroy();

        TString guid = CreateGuidAsString();
        TYTFileDumper headerDumper(guid, TYTChunkedOutput::TConstructionContext({output, currentShardId, 0, Nothing(), DumperChunkSize}));
        TMaybe<TYTFileDumper> dataDumper;
        if (UseDatalessParts) {
            dataDumper.ConstructInPlace(guid, TYTChunkedOutput::TConstructionContext({output, currentShardId, 1, Nothing(), DumperChunkSize}));
        }
        DumpIndexSegment(indexDir, headerDumper, dataDumper);

        indexDir.ForceDelete();
    }
}

static void DumpIndexSegment(const TFsPath& indexDir, const TYTFileDumper& headerDumper, const TMaybe<TYTFileDumper>& dataDumper) {
    TFileList fl;
    fl.Fill(indexDir, true);
    TVector<TString> dataFiles;
    TVector<TString> headerFiles;
    while (TString fileName = fl.Next()) {
        if (dataDumper.Defined() && IsFullArcPartFile(fileName)) {
            dataFiles.push_back(fileName);
        } else {
            headerFiles.push_back(fileName);
        }
    }
    if (dataDumper.Defined()) {
        dataDumper->DumpFileList(indexDir, dataFiles);
    }
    headerDumper.DumpFileList(indexDir, headerFiles);
}
