#include "wad_merge_host_adapter.h"

#include <robot/jupiter/protos/external_relev_attrs.pb.h>
#include <robot/jupiter/library/tables/shards_prepare.h>

#include <library/cpp/logger/global/global.h>
#include <util/folder/path.h>

namespace NRtDoc {
    namespace {
        struct THostSourceDoc {
            ui32 ChunkNo = Max<ui32>();
            ui32 HostId = Max<ui32>();
        };

        static TString GetIndexAaChecked(const TString& wadPath) {
            const TFsPath outputWadPath(wadPath);
            const TFsPath outputDir = outputWadPath.Dirname();
            Y_ENSURE(outputDir != outputWadPath, "path must include directory");

            const TString indexaaPrefix = outputDir / "index";
            const bool indexaaExists = NFs::Exists(indexaaPrefix + "aa") || NFs::Exists(indexaaPrefix + "aa.wad");
            Y_ENSURE(indexaaExists, "indexaa file must be present at " << indexaaPrefix);

            return indexaaPrefix;
        }
    }

    TWadHostAdapter::TWadHostAdapter(IWadMerger::TPtr slave)
        : Slave(slave)
    {
        *MutableMeta() = Slave->GetMeta();
    }

    TWadHostAdapter::~TWadHostAdapter() {
    }

    void TWadHostAdapter::Init(const TString& fileName) {
        const TString indexaaPrefix = GetIndexAaChecked(fileName);
        DestCategByDocId = MakeIntrusive<TCategMap>(indexaaPrefix);
        Slave->Init(fileName);
    }

    void TWadHostAdapter::BuildCategMap() {
        Y_ASSERT(DestCategByDocId);
        const ui32 endTrgHostIdRange = DestCategByDocId->GetValueRangeEnd();

        TVector<THostSourceDoc> reverseMap;
        reverseMap.resize(endTrgHostIdRange);

        for(ui32 chunkNo = 0; chunkNo < Sources.size(); ++chunkNo) {
            TChunk& s = Sources[chunkNo];
            Y_ENSURE(s.DocIdMap.Get() != nullptr);
            TCategMap::TPtr indexaa;
            if (!s.IsDelta) {
                // legacy mode OldHostId -> NewHostId mapping
                const TString indexaaPrefix = GetIndexAaChecked(s.WadPath);
                indexaa = MakeIntrusive<TCategMap>(indexaaPrefix);
            } // else delta mode OldDocId -> NewHostId mapping

            const bool gotFinalDocIds = s.IsDelta && GetMeta().PortionDeltaMode;
            const ui32 endChunkHostIdRange = indexaa ? indexaa->GetValueRangeEnd()
                : gotFinalDocIds ? s.DocIdMap->GetTargetRange().second
                : s.DocIdMap->Size();

            const TDocIdMap::TData& newDocIds = *s.DocIdMap->GetData();
            s.CategMap = MakeHolder<TDocIdMap>();
            s.CategMap->SetZeroMap(endChunkHostIdRange);

            for (ui32 docId = 0; docId < newDocIds.size(); ++docId) {
                const ui32 newDocId = newDocIds[docId];
                if (newDocId == TDocIdMap::DeletedDocument()) {
                    continue;
                }

                const ui32 newHostId = (*DestCategByDocId)[newDocId].GetHostId();

                const ui32 oldHostId = indexaa ? (*indexaa)[docId].GetHostId()
                    : gotFinalDocIds ? newHostId : docId;

                Y_ENSURE(oldHostId != Max<ui32>() && newHostId != Max<ui32>());
                Y_ENSURE(newHostId < reverseMap.size());

                // Herf versioning here are based on the assumption that the data from a new document should always
                // replace the previous version

                reverseMap[newHostId] = THostSourceDoc{chunkNo, oldHostId}; // this replaces the existing values if they are set.
            }
            s.DocIdMap.Destroy();
        }

        for (ui32 newHostId = 0; newHostId < reverseMap.size(); ++newHostId) {
            const THostSourceDoc& mapping = reverseMap[newHostId];
            const ui32 chunkNo = mapping.ChunkNo;
            if (chunkNo == Max<ui32>()) {
                Y_ASSERT(newHostId == 0 /*normally, only hostid==0 may be missing - though the missing id does not break anything*/);
                continue;
            }

            (*Sources[chunkNo].CategMap->MutableData())[mapping.HostId] = newHostId;
        }
    }

    //
    // This reducer handles only the documents from Delta, and removes those that are going to be removed by TWadHostAdapter anyways
    // This is needed to circumvent the limits of TOffroadStructDiffWadWriter
    // (where Position() must be <= Max<ui32>, which means size() <= 536870912 - no more than 512 MB of encoded data)
    //
    class TRegHerfDataReducer: public NYT::IReducer<NYT::TTableReader<NYT::Message>, NYT::TTableWriter<NYT::Message>> {
    public:
        TRegHerfDataReducer(const TFsPath& finalDir)
            : PassNo(0)
            , IndexAaPrefix(finalDir / "index")
        {
        }

        void Start(NYT::TTableWriter<NYT::Message>*) override {
            if (PassNo == 0) {
                const TString indexaaPrefix = GetIndexAaChecked(IndexAaPrefix);
                DestCategByDocId = MakeIntrusive<TCategMap>(indexaaPrefix);
                DocIdByHost = MakeHolder<TDocIdMap>();
            } else {
                Y_ASSERT(PassNo == 1);
                Y_ENSURE(DocIdByHost.Get());
                HostIdByDoc = MakeHostIdByDoc(*DocIdByHost);
            }
        }

        void Do(NYT::TTableReader<NYT::Message>* input, NYT::TTableWriter<NYT::Message>* output) override {
            Y_ENSURE(DocIdByHost.Get());
            if (PassNo == 0) {
                ui32 endHostId = 0;
                TDocIdMap::TData& docIdByHost = *DocIdByHost->MutableData();

                for (; input->IsValid(); input->Next()) {
                    NJupiter::THostAttrs value = input->MoveRow<NJupiter::THostAttrs>();
                    const ui32 docId = value.GetDocId(); // finalDocId after merge
                    const ui32 newHostId = (*DestCategByDocId)[docId].GetHostId(); // real hostId after merge
                    Y_ENSURE(docId != TDocIdMap::DeletedDocument() && newHostId != TDocIdMap::DeletedDocument());
                    if (endHostId < newHostId + 1) {
                        endHostId = newHostId + 1;
                        if (docIdByHost.size() < endHostId) {
                            Enlarge(docIdByHost, endHostId);
                        }
                    }

                    docIdByHost[newHostId] = docId; //second point of Herf versioning (same logic)
                }
                docIdByHost.resize(endHostId);
            } else {
                const TDocIdMap& hostIdForDoc = *HostIdByDoc;
                for (; input->IsValid(); input->Next()) {
                    NJupiter::THostAttrs value = input->MoveRow<NJupiter::THostAttrs>();
                    const ui32 docId = value.GetDocId(); // finalDocId after merge
                    Y_ENSURE(docId != TDocIdMap::DeletedDocument());
                    const ui32 hostId = hostIdForDoc.Map(docId);
                    if (hostId != TDocIdMap::DeletedDocument()) {
                        value.SetDocId(hostId); // finalHostId after merge
                        output->AddRow(value, 0);
                    }
                }
            }
        }

        void Finish(NYT::TTableWriter<NYT::Message>* output) override {
            if (PassNo == 0) {
                PassNo = 1;
            } else {
                output->Finish();
            }
        }

    private:
        static void Enlarge(TDocIdMap::TData& arr, const ui32 size) {
            if (arr.size() <= size) {
                ui32 newSize = arr.size() + Min<ui32>(arr.size() / 2 + 4, 131072);
                arr.resize(Max(newSize, size), TDocIdMap::DeletedDocument());
            }
        }

        static THolder<TDocIdMap> MakeHostIdByDoc(const TDocIdMap& usedDocs) {
            THolder<TDocIdMap> result = MakeHolder<TDocIdMap>();
            const ui32 endDocId = usedDocs.GetTargetRange().second;
            result->SetZeroMap(endDocId);

            auto& resultMap = *result->MutableData();
            const TDocIdMap::TData& docIdByHost = *usedDocs.GetData();
            for (ui32 hostId = 0; hostId < docIdByHost.size(); ++hostId) {
                ui32 docId = docIdByHost[hostId];
                if (Y_LIKELY(docId != TDocIdMap::DeletedDocument())) {
                    Y_ASSERT(docId < endDocId);
                    resultMap[docId] = hostId;
                }
            }
            return result;
        }

    private:
        ui32 PassNo;
        TString IndexAaPrefix;
        TCategMap::TPtr DestCategByDocId;
        THolder<TDocIdMap> DocIdByHost;
        THolder<TDocIdMap> HostIdByDoc;
    };

    NJupiter::TMercuryCmdPtr CreateReduceHostAttrsByDocId(const TFsPath& finalDirWithAa) {
        const auto tables = NJupiter::TShardsPrepareTables::Clientless();
        const ui32 bucketNo = 0;
        return NJupiter::TMercuryCmdBuilder()
            .JoinMap<TRegHerfDataReducer>(finalDirWithAa)
            .Input(tables.GetHostAttrsTable(bucketNo))
            .Output(tables.GetHostAttrsTable(bucketNo))
            .Release();
    }
}

