#include "pudge.h"
#include "log.h"

#include <robot/jupiter/library/pudge/config/load.h>
#include <robot/jupiter/library/pudge/structs/doc_chunk_mapping.h>
#include <robot/jupiter/library/pudge/structs/partition_doc_change.h>
#include <robot/jupiter/library/pudge/utility/doc_mappings_io.h>
#include <robot/jupiter/library/rtdoc/file/docidmap_io.h>
#include <robot/jupiter/library/rtdoc/file/model/docidmap.h>
#include <robot/jupiter/library/tables/shards_prepare.h>
#include <robot/jupiter/protos/arc.pb.h>
#include <robot/jupiter/protos/walrus.pb.h>

#include <robot/jupiter/tools/selectionrank/lib/pudge.h>

#include <util/system/fs.h>

namespace NFusion {

    const TString TPudgeData::ShardName = "Refresh/0-0";
    const TString TPudgeData::MappingFileName = "pudge_mapping";

    namespace {
        NRtDoc::TDocIdMap LoadMergerMapping(const NRtDoc::TBuilderTask::TBuilderInput& input) {
            NRtDoc::TDocIdMap mapping;
            Y_VERIFY(input.GetSrcMapping());
            NRtDoc::TDocIdMapIo::Load(&mapping, input.GetSrcMapping());
            return mapping;
        }
    }

    const NJupiter::TTierConfig TPudgeData::LoadTierConfig() {
        const auto pudgeConf = NJupiter::LoadPudgeConfig();
        auto tierConf = pudgeConf.HasForShard(ShardName) ? pudgeConf.GetForShard(ShardName) : nullptr;
        Y_VERIFY(tierConf);
        return *tierConf;
    }

    void TPudgeData::GenerateNewMapping(const NRtDoc::TBuilderTask& task, const TCreateClient& createClient) {
        Config = LoadTierConfig();

        for (const auto& input : task.GetInputs()) {
            if (input.GetIsFinalIndex()) {
                Y_VERIFY(!PrevFinal);
                PrevFinal = input;
            }
        }

        TMaybe<TPudgeMapping> prevMapping;
        if (PrevFinal) {
            const TString prevPudgeMappingFile = PrevFinal->GetSrcDir() + "/" + MappingFileName;

            if (!NFs::Exists(prevPudgeMappingFile)) {
                auto client = createClient(PrevFinal->GetSrcDir());
                GeneratePrevFinalPudgeMapping(*client, *PrevFinal);
                Y_VERIFY(NFs::Exists(prevPudgeMappingFile));
            }

            prevMapping.ConstructInPlace();
            TFileInput in(prevPudgeMappingFile);
            Y_ENSURE(prevMapping->ParseFromArcadiaStream(&in));
        }

        Y_VERIFY(!!PrevFinal == !!prevMapping);

        TVector<TDoc> docs = CreateDocs(task.GetOutput().GetDocCountStat());

        if (PrevFinal) {
            SetChunkInfoFromPrevMapping(*PrevFinal, *prevMapping, docs);
        }

        auto pudgeDeltaChunkIndices = SelectDeltaChunks(docs);

        auto client = createClient(NRtDoc::IBuilderInputs(task).GetDeltaDir());

        TVector<NJupiter::TPudgeDeltaChunk> pudgeDeltaChunks = AssignDocIds(*client, pudgeDeltaChunkIndices, docs);

        // Copy TWalrus.Arc from delta preps to portion in TArc format for NJupiter::BuildChunkedTextArc
        if (prevMapping) {
            ExtractWalrusArc(*client, docs);
        }

        WritePudgeMapping(docs, task.GetOutput().GetTrgDir());

        WriteDocChunkMappingTables(*client, docs);
        WriteDeltaChunkTable(*client, pudgeDeltaChunks);
    }

    TVector<NDoom::TDocChunkMapping> TPudgeData::LoadDocChunkMapping(const TString& dir) {
        const TString mappingFileName = dir + "/" + MappingFileName;
        TFileInput in(mappingFileName);
        TPudgeMapping mapping;
        Y_ENSURE(mapping.ParseFromArcadiaStream(&in));

        TVector<NDoom::TDocChunkMapping> ret(Reserve(mapping.GetDocs().size()));
        for (const auto& doc : mapping.GetDocs()) {
            ret.emplace_back(doc.GetChunk(), doc.GetLocalId());
        }
        return ret;
    }

    void TPudgeData::WritePudgeMapping(const TVector<TDoc>& docs, const TString& outputDir) const {
        TPudgeMapping mapping;
        auto& mappingDocs = *mapping.MutableDocs();
        for (const auto& doc : docs) {
            Y_VERIFY(doc.Chunk);
            Y_VERIFY(doc.LocalId);
            auto& mapping = *mappingDocs.Add();
            mapping.SetChunk(*doc.Chunk);
            mapping.SetLocalId(*doc.LocalId);
        }
        TFileOutput out(outputDir + "/" + MappingFileName);
        Y_ENSURE(mapping.SerializeToArcadiaStream(&out));
        out.Finish();
    }

    void TPudgeData::GeneratePrevFinalPudgeMapping(NYT::IIOClient& client, const NRtDoc::TBuilderTask::TBuilderInput& prevFinal) {
        TVector<ui32> pudgeDeltaChunkIndices(Config.GetChunks());
        Iota(pudgeDeltaChunkIndices.begin(), pudgeDeltaChunkIndices.end(), 0);

        TVector<TDoc> docs = CreateDocs(LoadMergerMapping(prevFinal).Size());
        AssignDocIds(client, pudgeDeltaChunkIndices, docs);
        WritePudgeMapping(docs, prevFinal.GetSrcDir());
    }

    void TPudgeData::SetChunkInfoFromPrevMapping(const NRtDoc::TBuilderTask::TBuilderInput& prevFinal, const TPudgeMapping& prevMapping, TVector<TDoc>& docs) {
        NRtDoc::TDocIdMap prevToNewMap = LoadMergerMapping(prevFinal);

        ui64 numDeleted = 0;
        for (ui32 prevId = 0; prevId < prevToNewMap.Size(); ++prevId) {
            const ui32 newId = prevToNewMap.Map(prevId);
            Y_VERIFY(prevId != NRtDoc::TDocIdMap::DeletedDocument());
            if (newId == NRtDoc::TDocIdMap::DeletedDocument()) {
                ++numDeleted;
                continue;
            }
            Y_VERIFY(newId < docs.size());
            auto& doc = docs[newId];

            doc.PrevDocId = prevId;
            {
                const auto& prevDoc = prevMapping.GetDocs(prevId);
                doc.Chunk = prevDoc.GetChunk();
                doc.LocalId = prevDoc.GetLocalId();
            }
        }
    }

    TVector<NJupiter::TPudgeChunkStats> TPudgeData::FillChunkStats(const TVector<TDoc>& docs) const {
        TVector<NJupiter::TPudgeChunkStats> chunks(Config.GetChunks());
        ui64 numAddedDocs = 0;
        for (const auto& doc : docs) {
            if (doc.PrevDocId) {
                Y_VERIFY(doc.Chunk);
                auto& chunk = chunks.at(*doc.Chunk);
                chunk.SetMaxLocalIdPlusOne(Max(chunk.GetMaxLocalIdPlusOne(), *doc.LocalId + 1));
                chunk.SetNumAliveDocs(chunk.GetNumAliveDocs() + 1);
            } else {
                ++numAddedDocs;
            }
        }
        chunks[0].SetNumAddedDocs(numAddedDocs);
        return chunks;
    }

    void TPudgeData::ExtractWalrusArc(NYT::IIOClient& client, const TVector<TDoc>& docs) const {
        TVector<ui32> newDocIds;
        for (const auto& doc : docs) {
            if (!doc.PrevDocId) {
                newDocIds.push_back(doc.DocId);
            }
        }

        const auto tables = NJupiter::TShardsPrepareTables::Clientless();
        auto reader = client.NYT::IIOClient::CreateTableReader<NJupiter::TWalrus>(
            NJupiter::GetYtPathName(tables.GetWalrusTable(0))
        );
        auto writer = client.NYT::IIOClient::CreateTableWriter<NJupiter::TArc>(
            NJupiter::GetYtPathName(tables.GetTextArcDeltaTable(0))
        );

        auto newDocIt = newDocIds.begin();
        for (;reader->IsValid() && newDocIt != newDocIds.end(); reader->Next()) {
            const auto& row = reader->GetRow();
            Y_VERIFY(*newDocIt == row.GetDocId());

            ++newDocIt;
            NJupiter::TArc msg;
            msg.SetShard(ShardName);
            msg.SetDocId(row.GetDocId());
            msg.SetArc(row.GetArc());
            writer->AddRow(msg);
        }
        Y_VERIFY(newDocIt == newDocIds.end());
        writer->Finish();
    }

    void TPudgeData::WriteDocChunkMappingTables(NYT::IIOClient& client, const TVector<TDoc>& docs) const {
        const auto tables = NJupiter::TShardsPrepareTables::Clientless();
        for (auto table : {tables.GetDocChunkMappingTable(0)}) {
            auto tableWriter = client.NYT::IIOClient::CreateTableWriter<NJupiter::TDocMappingPart>(
                NJupiter::GetYtPathName(table)
            );

            NJupiter::TVarintBufferedWriter<NYT::TTableWriter<NJupiter::TDocMappingPart>> mappingWriter(ShardName, &*tableWriter, 0);

            for (const auto& doc : docs) {
                NJupiter::TDocChunkMapping entry;
                entry.SetDocId(doc.DocId);
                entry.SetChunk(*doc.Chunk);
                entry.SetLocalId(*doc.LocalId);
                mappingWriter.WriteEntry(entry);
            }

            mappingWriter.Finish();
            tableWriter->Finish();
        }
    }

    void TPudgeData::WriteDeltaChunkTable(NYT::IIOClient& client, const TVector<NJupiter::TPudgeDeltaChunk>& deltaChunks) const {
        const auto tables = NJupiter::TShardsPrepareTables::Clientless();
        auto tableWriter = client.NYT::IIOClient::CreateTableWriter<NJupiter::TPudgeDeltaChunk>(
            NJupiter::GetYtPathName(tables.GetPudgeDeltaChunksTable(0))
        );

        for (const auto& chunk : deltaChunks) {
            tableWriter->AddRow(chunk);
        }

        tableWriter->Finish();
    }

    // TODO Reuse AssignDocIds ~/a/robot/jupiter/tools/selectionrank/lib/assign_docids.cpp
    TVector<NJupiter::TPudgeDeltaChunk> TPudgeData::AssignDocIds(
        NYT::IIOClient& client,
        const TVector<ui32>& pudgeDeltaChunkIndices,
        TVector<TDoc>& docs
    ) {
        const auto tables = NJupiter::TShardsPrepareTables::Clientless();
        TVector<NJupiter::TPudgeDeltaChunk> pudgeDeltaChunks;

        auto tableWriter = client.NYT::IIOClient::CreateTableWriter<NJupiter::TDocMappingPart>(
            NJupiter::GetYtPathName(tables.GetPartitionDocChangesTable(0))
        );
        NJupiter::TVarintBufferedWriter<NYT::TTableWriter<NJupiter::TDocMappingPart>> partitionDocChangesWriter(
            ShardName, &*tableWriter, 0);

        for (auto chunkIdx : pudgeDeltaChunkIndices) {
            pudgeDeltaChunks.emplace_back().SetChunk(chunkIdx);
        }

        ui32 nextDeltaChunkIndex = 0;
        auto mapDocToDeltaChunk = [&](TDoc& doc) {
            NJupiter::TPudgeDeltaChunk& deltaChunk = pudgeDeltaChunks.at(nextDeltaChunkIndex);
            nextDeltaChunkIndex = (nextDeltaChunkIndex + 1) % pudgeDeltaChunks.size();
            doc.Chunk = deltaChunk.GetChunk();
            doc.LocalId = deltaChunk.GetNumDocs();
            deltaChunk.SetNumDocs(deltaChunk.GetNumDocs() + 1);
        };

        TVector<bool> isDeltaChunk(Config.GetChunks());
        for (const auto& chunk : pudgeDeltaChunks) {
            isDeltaChunk.at(chunk.GetChunk()) = true;
        }

        for (auto& doc : docs) {
            if (doc.PrevDocId) {
                Y_VERIFY(doc.Chunk);
                if (isDeltaChunk.at(*doc.Chunk)) {
                    const auto prevDoc = doc;
                    mapDocToDeltaChunk(doc);

                    NJupiter::TPartitionDocChange partitionDocChange;
                    partitionDocChange.SetType(NJupiter::MovedDocChangeType);
                    partitionDocChange.SetMappedId(doc.DocId);
                    partitionDocChange.SetChunk(*prevDoc.Chunk);
                    partitionDocChange.SetDocId(*prevDoc.LocalId);
                    partitionDocChangesWriter.WriteEntry(partitionDocChange);
                }
            } else {
                mapDocToDeltaChunk(doc);

                NJupiter::TPartitionDocChange partitionDocChange;
                partitionDocChange.SetType(NJupiter::NewDocChangeType);
                partitionDocChange.SetMappedId(doc.DocId);
                partitionDocChange.SetDocId(doc.DocId);
                partitionDocChangesWriter.WriteEntry(partitionDocChange);
            }
        }

        partitionDocChangesWriter.Finish();
        tableWriter->Finish();

        return pudgeDeltaChunks;
    }

    TVector<ui32> TPudgeData::SelectDeltaChunks(const TVector<TDoc>& docs) const {
        TVector<NJupiter::TPudgeChunkStats> chunks = FillChunkStats(docs);
        return NJupiter::SelectPudgeDeltaChunks(chunks, /* incrementalMode =*/ true, ShardName);
    }

    TVector<TPudgeData::TDoc> TPudgeData::CreateDocs(size_t count) {
        Y_VERIFY(count);
        TVector<TDoc> docs(count);
        for (ui32 i = 0; i < docs.size(); ++i) {
            docs[i].DocId = i;
        }
        return docs;
    }

}
