#include "bundle.h"

#include "arc_merge.h"
#include "delta_subtask.h"
#include "frq_merge.h"
#include "portions_bundle.h"
#include "keyinvwad.h"
#include "wad_merge.h"
#include "custom_mergers.h"
#include "host_mergers.h"
#include "plutonium.h"
#include "shards_prepare.h"

#include "tools.h"
#include "info.h"

#include "fix_panther_meta.h"
#include "panther_dssm_docid.h"
#include "mercury_l2_fix.h"

#include "log.h"

#include <robot/jupiter/tools/shards_prepare/lib/build_attrs.h>
#include <robot/jupiter/tools/shards_prepare/lib/build_counts.h>
#include <robot/jupiter/tools/shards_prepare/lib/build_group_attrs_configs.h>
#include <robot/jupiter/tools/shards_prepare/lib/build_keyinv_and_counts.h>
#include <robot/jupiter/tools/shards_prepare/lib/build_ngram_counts.h>
#include <robot/jupiter/tools/shards_prepare/lib/build_panther.h>
#include <robot/jupiter/tools/shards_prepare/lib/build_panther_dssm_embeddings.h>
#include <robot/jupiter/tools/shards_prepare/lib/prepare_group_attrs.h>

#include <robot/jupiter/protos/external_relev_attrs.pb.h>
#include <robot/jupiter/protos/shards.pb.h>
#include <robot/jupiter/library/rtdoc/interface/mercury.h>
#include <robot/jupiter/library/rtdoc/rt/reader.h>
#include <robot/jupiter/library/rtdoc/file/reader.h>
#include <robot/jupiter/library/rtdoc/rt/writer.h>

#include <robot/jupiter/tools/shards_prepare/lib/prepare_static_features.h>
#include <robot/jupiter/tools/shards_prepare/lib/external_relev_attrs_prepare.h>
#include <robot/jupiter/tools/shards_prepare/lib/host_attrs_prepare.h>
#include <robot/jupiter/tools/shards_prepare/lib/content_attrs_prepare.h>
#include <robot/jupiter/tools/shards_prepare/lib/calculated_attrs_prepare.h>
#include <robot/jupiter/tools/shards_prepare/lib/prepare_sent.h>
#include <robot/jupiter/tools/shards_prepare/lib/build_extinfo_arc.h>
#include <robot/jupiter/tools/shards_prepare/lib/build_panther_dssm_embeddings.h>
#include <robot/jupiter/tools/shards_prepare/lib/build_attrs.h>
#include <robot/jupiter/tools/shards_prepare/lib/hnsw_prepare.h>
#include <robot/jupiter/tools/shards_prepare/lib/build_nav_src_strict.h>
#include <robot/jupiter/tools/shards_prepare/lib/url_heavy_data_prepare.h>

#include <robot/jupiter/tools/shardmerge_utils/lib/document_index/arc_merge.h>
#include <robot/jupiter/tools/shardmerge_utils/lib/document_index/document_index_merge.h>
#include <robot/jupiter/tools/shardmerge_utils/lib/document_index/erf_merge.h>
#include <robot/jupiter/tools/shardmerge_utils/lib/document_index/group_attrs_merge.h>
#include <robot/jupiter/tools/shardmerge_utils/lib/document_index/indexattrs_seg_tree_merge.h>
#include <robot/jupiter/tools/shardmerge_utils/lib/document_index/raw_lumps_index_merge.h>
#include <robot/jupiter/tools/shardmerge_utils/lib/document_index/regerf_merge.h>
#include <robot/jupiter/tools/shardmerge_utils/lib/navsrc/nav_src.h>
#include <robot/jupiter/tools/shardmerge_utils/lib/panther/merge_offroad_keyinv.h>
#include <robot/jupiter/tools/shardmerge_utils/lib/document_index/itditp_slim_index_merge.h>

#include <robot/jupiter/library/opt/inproc_args.h>
#include <robot/jupiter/library/opt/panther.h>
#include <robot/jupiter/library/rtdoc/file/model/builder_input.h>
#include <robot/jupiter/library/rtdoc/file/model/files.h>
#include <robot/jupiter/library/rtdoc/file/prep_merge.h>
#include <robot/jupiter/library/rtdoc/file/portions_output.h>
#include <robot/jupiter/library/rtdoc/file/docidmap_io.h>
#include <robot/jupiter/library/rtdoc/file/yt_client.h>

#include <search/panther/protos/frequent_terms.pb.h>
#include <search/panther/protos/most_frequent_terms_global_countes.pb.h>

#include <library/cpp/protobuf/util/pb_io.h>
#include <library/cpp/logger/global/global.h>

#include <util/generic/lazy_value.h>
#include <util/system/fs.h>
#include <util/string/type.h>

#define LOG C_LOG(this->Log)

namespace NFusion {

    using TFrequentTerms = TVector<TString>;

    using TMostFrequentTermsGlobalCounters = THashMap<TString, TVector<ui64>>;

    const TString TExtBuilderTask::RebuildTagFileName = "rebuild_tag";

    namespace {
        constexpr TStringBuf frequentTermsFileName = "frequent_terms.proto";

        constexpr TStringBuf mostFrequentTermsGlobalCountersFileName = "most_frequent_terms_global_countes";

        constexpr TStringBuf fakeMercuryShard = "-";

        static TFsPath FormatDeltaMappingPath(const NRtDoc::TBuilderTask& task) {
            const TFsPath deltaWadMapFile = TFsPath(task.GetOutput().GetTempDir()) / (TString("delta") + NRtDoc::TBuilderFiles::MergerMapSuffix);
            return deltaWadMapFile;
        }

        static NRtDoc::TBuilderTask::TBuilderInput GetDeltaInput(const NRtDoc::TBuilderTask& task) {
            NRtDoc::TBuilderTask::TBuilderInput input;
            input.SetSrcDir(NRtDoc::IBuilderInputs(task).GetDeltaDir());
            input.SetSrcMapping(FormatDeltaMappingPath(task));
            return input;
        }

        NJupiter::TInProcArgs PatchArcMergeOpts(NJupiter::TInProcArgs args) {
            args.ArcMergeOpts.DocsCount = Max<ui32>();
            return args;
        }
    }

    TVector<TString> LoadFrequentTerms(const TFsPath& basePath) {
        if (!basePath.IsDefined()) {
            // skip loading (used in manual runs and unit tests)
            return TVector<TString>();
        }

        const TFsPath path = basePath / frequentTermsFileName;
        Y_ENSURE(NFs::Exists(path), "LoadFrequentTerms: file not found: " << path);
        TFileInput inFile(path);
        NPanther::TFrequentTermsProto termsProto;
        Y_ENSURE(termsProto.ParseFromArcadiaStream(&inFile), "LoadFrequentTerms: incorrect protobuf in " << path);
        TVector<TString> terms(Reserve(termsProto.TermsSize()));
        for (auto& term : termsProto.GetTerms()) {
            terms.push_back(term);
        }
        return terms;
    }

    TLazyValue<TFrequentTerms> LoadFrequentTermsLazy(const TFsPath& basePath) {
        return TLazyValue<TFrequentTerms>([basePath]() {
            return LoadFrequentTerms(basePath);
        });
    }

    TString ConstructWadFileName(const TString& outputPrefix) {
        return TString::Join(outputPrefix, ".wad");
    }

    TString ICmdExtensions::ConstructFileName(const TString& outputPrefix) const {
        return ConstructWadFileName(outputPrefix);
    }

    TString ConstructIndexFileName(ICmdExtensions* extension, const TString& outputPrefix) {
        return extension ? extension->ConstructFileName(outputPrefix) : ConstructWadFileName(outputPrefix);
    }

    void RunMercuryShardPrepareProcessors(TShardsPrepareLumpsFiller& filler, const TString& inputLumpsFile, const TString& outputLumpsFile,
                                          bool heavyDataLumpsEnabled = false) {
        NRtDoc::TPrepTable reader(inputLumpsFile);
        reader.Init();
        auto it = reader.Open();

        static THashSet<TString> lumpsWhiteList{
            "calculated_attrs",
            "calculated_attrs_with_keyinv_doc_chunk_mapping_single_chunk",
            "content_attrs",
            "external_relev_attrs",
            "host_attrs",
            "host_mapping",
            "host_domain_doc_mapping",
            "walrus",
            "walrus_with_keyinv_doc_chunk_mapping_single_chunk",
            "mobile_url",
        };

        if (heavyDataLumpsEnabled) {
            lumpsWhiteList.insert("url_heavy_data");
        }

        const TString fakeShardStats = []() {
            NJupiter::TUrlToShardStats msg;
            msg.SetDocCount(1);
            msg.SetDocSize(0);
            msg.SetShard("-");
            return msg.SerializeAsString();
        }();

        NRtDoc::TPrepWriter writer(outputLumpsFile);
        while (it->Next()) {
            const auto& doc = it->GetData();
            Y_ASSERT(doc.TmpDocId != NRtDoc::TDocIdMap::DeletedDocument());
            if (doc.TmpDocId == NRtDoc::TDocIdMap::DeletedDocument()) {
                continue;
            }

            const auto& buf = doc.Data;
            NJupiter::TMercuryLumps src;
            Y_ENSURE(src.ParseFromArray(buf.data(), buf.size()));

            {
                auto* lump = src.MutableLumps()->Add();
                lump->SetName("shards_stats");
                lump->SetData(fakeShardStats);
            }

            NJupiter::TMercuryLumps dst;

            for (const auto& srcLump : src.GetLumps()) {
                if (lumpsWhiteList.contains(srcLump.GetName())) {
                    *dst.MutableLumps()->Add() = srcLump;
                }
            }

            filler.Fill(src, dst);

            TString serialized = dst.SerializeAsString();
            const auto data = TBlob::FromStringSingleThreaded(serialized);
            writer.AddData(doc.FinalDocId, data);
        }

        writer.Finish();
    }

    THolder<TExtBuilderBundle> TExtBuilderTask::GetBundle(const TBundleVersion& bundleVer, const TString& modelsDir, const TString& /*targetDir*/) {
        THolder<TExtBuilderBundle> bundle = MakeHolder<TExtBuilderBundle>();
        if (bundleVer.Disabled) {
            return bundle;
        }

        using NBundleMeta::TDeploy;
        using NBundleMeta::TMode;

        const ICmdExtensions::TPtr noExtensions;

        const ui32 ver = bundleVer.GetGlobalNo();
        const auto& b = bundleVer;
        Y_UNUSED(b);

        const bool hasModelsDir = !modelsDir.empty();
        auto frequentTerms = LoadFrequentTermsLazy(modelsDir);

        {
            bundle->SavedPreparates.Enabled = true;
            // PrepareGroupAttrsAndMappings
            bundle->SavedPreparates.Filter.push_back("calculated_attrs");
            bundle->SavedPreparates.Filter.push_back("content_attrs");
            if (HeavyDataLumpsEnabled) {
                bundle->SavedPreparates.Filter.push_back("url_heavy_data");
            }
            // ReduceGroupAttrsConfigs
            bundle->SavedPreparates.Filter.push_back("group_attrs_configs");
            // MergeInvHash
            bundle->SavedPreparates.Filter.push_back("inv_url_hash");
            // BuildPantherDssmEmbeddings
            bundle->SavedPreparates.Filter.push_back("panther_dssm_url_title_embeddings");
            bundle->SavedPreparates.Filter.push_back("erf_herf_features");
            // rebuild L1
            bundle->SavedPreparates.Filter.push_back("l1_doc_lumps");
            bundle->SavedPreparates.Filter.push_back("l1_global_lumps");
            // rebuild L2
            bundle->SavedPreparates.Filter.push_back("calculated_attrs_l2_doc_lumps");
            bundle->SavedPreparates.Filter.push_back("calculated_attrs_l2_global_lumps");
            bundle->SavedPreparates.Filter.push_back("content_attrs_l2_doc_lumps");
            bundle->SavedPreparates.Filter.push_back("content_attrs_l2_global_lumps");
            if (HeavyDataLumpsEnabled) {
                bundle->SavedPreparates.Filter.push_back("url_heavy_data_l2_doc_lumps");
                bundle->SavedPreparates.Filter.push_back("url_heavy_data_l2_global_lumps");
            }
            bundle->SavedPreparates.Filter.push_back("external_relev_attrs_l2_doc_lumps");
            bundle->SavedPreparates.Filter.push_back("external_relev_attrs_l2_global_lumps");
            // misc
            bundle->SavedPreparates.Filter.push_back("nav_src_strict");

            if (ver >= 199) {
                bundle->SavedPreparates.Filter.push_back("calculated_attrs_with_keyinv_doc_chunk_mapping_single_chunk");
                bundle->SavedPreparates.Filter.push_back("external_relev_attrs");
                bundle->SavedPreparates.Filter.push_back("host_attrs");
                bundle->SavedPreparates.Filter.push_back("host_mapping");
                bundle->SavedPreparates.Filter.push_back("host_domain_doc_mapping");
                bundle->SavedPreparates.Filter.push_back("walrus");
                bundle->SavedPreparates.Filter.push_back("walrus_with_keyinv_doc_chunk_mapping_single_chunk");
                bundle->SavedPreparates.Filter.push_back("mobile_url");
            }
        }

        { // PriorCommands (groupattrs)
            bundle->PriorPrepCommands.push_back(TMercuryPrepCmd{"ReduceGroupAttrsConfigs", NJupiter::CreateReduceGroupAttrsConfigs()});
            bundle->PriorPrepCommands.push_back(TMercuryPrepCmd{"PrepareGroupAttrsAndMappings", NJupiter::CreatePrepareGroupAttrsAndMappings()});

            bundle->PriorCommands.push_back(TJupiterCmd{"MergeGroupAttrs", "indexaa", TMode::Rebuild, TDeploy::Gradual, noExtensions, {},
                    [](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                        NJupiter::MergeGroupAttrsLegacy(NJupiter::TInProcArgs(smOpts, prep));
                        NJupiter::MergeHostDomainCategoryToName(NJupiter::TInProcArgs(smOpts, prep));
                    }});
        }

        { // please keep this ident to protect Svn Blame
            bundle->Commands.push_back(TJupiterCmd{"MergeErf", "indexerf2", TMode::WadMerge, TDeploy::PanicIfMissing, noExtensions, {},
                    [](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                        NJupiter::ErfMerge(NJupiter::TInProcArgs(smOpts, prep));
                    }});

            bundle->Commands.push_back(TJupiterCmd{"MergeAnnData", "indexann.data", TMode::WadMerge, TDeploy::Gradual, noExtensions, {},
                    [](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                        NJupiter::AnnDataMerge(NJupiter::TInProcArgs(smOpts, prep));
                    }});

            bundle->Commands.push_back(TJupiterCmd{"MergeLinkAnnData", "indexlinkann.data", TMode::WadMerge, TDeploy::Gradual, noExtensions, {},
                    [](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                        NJupiter::LinkAnnDataMerge(NJupiter::TInProcArgs(smOpts, prep));
                    }});

            if (Y_LIKELY(hasModelsDir)) {
                // 'counts' preparates are dependent on frequentTerms
                bundle->PrepCommands.push_back(TMercuryPrepCmd{"BuildTextKeyInvAndCounts", NJupiter::CreateBuildTextKeyInvAndCounts(*frequentTerms)});
                bundle->PrepCommands.push_back(TMercuryPrepCmd{"BuildAnnKeyInvCounts", NJupiter::CreateBuildAnnKeyInvAndCounts(*frequentTerms)});
                bundle->PrepCommands.push_back(TMercuryPrepCmd{"BuildLinkKeyInvCounts", NJupiter::CreateBuildLinkKeyInvAndCounts(*frequentTerms)});
                bundle->PrepCommands.push_back(TMercuryPrepCmd{"BuildFactorAnnKeyInvCounts", NJupiter::CreateBuildFactorAnnKeyInvAndCounts(*frequentTerms)});
            }

            bundle->Commands.push_back(TJupiterCmd{"MergeTextDocumentIndex", "indexkeyinv", TMode::WadCustomMerge, TDeploy::PanicIfMissing, new NFusion::TKeyInvWad(), {},
                    [=](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                        NJupiter::DocumentIndexTextMerge(NJupiter::TInProcArgs(smOpts, prep), *frequentTerms);
                    }});

            bundle->SpecialCommands.insert("ReduceHostAttrsByDocId");
            bundle->Commands.push_back(TJupiterCmd{"MergeRegHerf", "indexregherf", TMode::WadCustomMerge, TDeploy::PanicIfMissing, new NFusion::TRegHostMerger(), {},
                    [](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                        NJupiter::MergeRegHostErf(NJupiter::TInProcArgs(smOpts, prep));
                    }});

            bundle->Commands.push_back(TJupiterCmd{"MergeRegErf", "indexregerf", TMode::WadCustomMerge, TDeploy::PanicIfMissing, new NFusion::TRegErfMerger(), {},
                    [](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                        NJupiter::MergeRegErf(NJupiter::TInProcArgs(smOpts, prep));
                    }});
            bundle->Commands.push_back(TJupiterCmd{"MergeHerf", "indexherf", TMode::WadCustomMerge, TDeploy::PanicIfMissing, new NFusion::THostMerger(), {},
                    [](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                        NJupiter::HerfMerge(NJupiter::TInProcArgs(smOpts, prep));
                    }});

            bundle->Commands.push_back(TJupiterCmd{"MergeAnnDocumentIndex", "indexann", TMode::WadCustomMerge, TDeploy::PanicIfMissing, new NFusion::TKeyInvWad(), {},
                    [](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                        NJupiter::DocumentIndexAnnMerge(NJupiter::TInProcArgs(smOpts, prep));
                    }});

            bundle->Commands.push_back(TJupiterCmd{"MergeLinkAnnDocumentIndex", "indexlinkann", TMode::WadCustomMerge, TDeploy::PanicIfMissing, new NFusion::TKeyInvWad(), {},
                    [](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                        NJupiter::DocumentIndexLinkAnnMerge(NJupiter::TInProcArgs(smOpts, prep));
                    }});
            bundle->Commands.push_back(TJupiterCmd{"MergeLinkAnnSentenceLengths", "indexlinkann.sent", TMode::WadCustomMerge, TDeploy::PanicIfMissing, noExtensions, {},
                    [](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                        NJupiter::LinkAnnSentenceLengthsMerge(NJupiter::TInProcArgs(smOpts, prep));
                    }});

            bundle->Commands.push_back(TJupiterCmd{"MergeKeyInvSentenceLengths", "indexsent", TMode::WadCustomMerge, TDeploy::PanicIfMissing, noExtensions, {},
                    [](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                        NJupiter::KeyInvSentenceLengthsMerge(NJupiter::TInProcArgs(smOpts, prep));
                    }});

            if (ver < 174) {
                bundle->Commands.push_back(TJupiterCmd{"MergeTextArc", "index", TMode::WadCustomMerge, TDeploy::PanicIfMissing, MakeIntrusive<TArcExtension>(), {},
                        [](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                            NJupiter::ArcMergeDeprecated(PatchArcMergeOpts(NJupiter::TInProcArgs(smOpts, prep)));
                        }});
            }

            if (ver >= 173) {
                bundle->Commands.push_back(TJupiterCmd{"MergeTextArcChunked", "index", TMode::Rebuild, TDeploy::PanicIfMissing, MakeIntrusive<TChunkedArcWadExtension>(), {},
                        [](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                            auto opts = smOpts;
                            opts.Shard = TPudgeData::ShardName;
                            NJupiter::ArcMergeSaas(PatchArcMergeOpts(NJupiter::TInProcArgs(opts, prep)));
                        }});
            }

            if (ver < 182) {
                auto extArc = MakeIntrusive<TArcWadExtension>("indexarc", NDoom::TWadLumpId(NDoom::EWadIndexType::ExtInfoArcIndexType, NDoom::EWadLumpRole::Struct));
                bundle->Commands.push_back(TJupiterCmd{"MergeExtInfoArc", "index.extinfo", TMode::WadMerge, TDeploy::PanicIfMissing, std::move(extArc), {},
                        [ver](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                            auto args = PatchArcMergeOpts(NJupiter::TInProcArgs(smOpts, prep));
                            args.ArcMergeOpts.EnableBertEmbeddings = ver >= 180;
                            NJupiter::ExtInfoArcMerge(args);
                        }});
            }

            bundle->Commands.push_back(TJupiterCmd{"MergeFactorAnnDocumentIndex", "indexfactorann", TMode::WadCustomMerge, TDeploy::Gradual, new NFusion::TKeyInvWad(), {},
                    [](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                        NJupiter::DocumentIndexFactorAnnMerge(NJupiter::TInProcArgs(smOpts, prep));
                    }});
            bundle->Commands.push_back(TJupiterCmd{"MergeFactorAnnData", "indexfactorann.data", TMode::WadMerge, TDeploy::Gradual, noExtensions, {},
                    [](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                        NJupiter::FactorAnnDataMerge(NJupiter::TInProcArgs(smOpts, prep));
                    }});
            bundle->Commands.push_back(TJupiterCmd{"MergeFactorAnnSentenceLengths", "indexfactorann.sent", TMode::WadCustomMerge, TDeploy::Gradual, noExtensions, {},
                    [](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                        NJupiter::FactorAnnSentenceLengthsMerge(NJupiter::TInProcArgs(smOpts, prep));
                    }});

            auto ext = MakeIntrusive<TArcWadExtension>("indexlinkann.arc", NDoom::TWadLumpId(NDoom::EWadIndexType::LinkAnnArcIndexType, NDoom::EWadLumpRole::Struct));
            bundle->Commands.push_back(TJupiterCmd{"MergeLinkAnnArc", "indexlinkann.", TMode::WadMerge, TDeploy::PanicIfMissing, std::move(ext), {},
                    [](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                        NJupiter::LinkAnnArcMerge(PatchArcMergeOpts(NJupiter::TInProcArgs(smOpts, prep)));
                    }});

            bundle->Commands.push_back(TJupiterCmd{"MergeInvHash", "indexinvhash", TMode::Rebuild, TDeploy::PanicIfMissing, noExtensions, {},
                    [](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                        NJupiter::InvHashMerge(NJupiter::TInProcArgs(smOpts, prep));
                    }});

            bundle->Commands.push_back(TJupiterCmd{"MergeAnnSentenceLengths", "indexann.sent", TMode::WadMerge, TDeploy::Gradual, noExtensions, {},
                    [](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                        NJupiter::AnnSentenceLengthsMerge(NJupiter::TInProcArgs(smOpts, prep));
                    }});

            if (ver >= 185) {
                bundle->Commands.push_back(TJupiterCmd{"MergeItdItpSlimIndex", "indexitditpslim", TMode::WadMerge, TDeploy::Gradual, noExtensions, {},
                        [](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                            NJupiter::MergeItdItpSlimIndex(NJupiter::TInProcArgs(smOpts, prep));
                        }});
            }

            bundle->Commands.push_back(TJupiterCmd{"MergeFrq", "indexfrq", TMode::Rebuild, TDeploy::PanicIfMissing, noExtensions, {},
                    [](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                        MergeFrq(NJupiter::TInProcArgs(smOpts, prep));
                    }});

            bundle->PrepCommands.push_back(TMercuryPrepCmd{"MergeAttrs", NJupiter::CreateMergeAttrs(NJupiter::TPortionHitRemapperPtr())});;
            bundle->SpecialCommands.insert("MergeAttrsSaas");

            const bool withPantherDssm = ver >= 165;
            auto& mergeL1InjectionPoint = withPantherDssm ? bundle->LateCommands : bundle->Commands;
            mergeL1InjectionPoint.push_back(TJupiterCmd{"MergeL1", "index.l1", withPantherDssm ? TMode::WadMerge : TMode::Rebuild, TDeploy::PanicIfMissing, noExtensions, {},
                    [=](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                        NJupiter::L1Merge(NJupiter::TInProcArgs(smOpts, prep), withPantherDssm);
                    }});

            bundle->Commands.push_back(TJupiterCmd{"MergeL2", "index.l2", TMode::Rebuild, TDeploy::PanicIfMissing, noExtensions, {},
                    [this](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                        if (HeavyDataLumpsEnabled) {
                            NJupiter::L2Merge(NJupiter::TInProcArgs(smOpts, prep));
                        } else {
                            NJupiter::L2MergeOld(NJupiter::TInProcArgs(smOpts, prep));
                        }
                    }});
            //TODO(REFRESH-319): use TDeploy::RebuildOrMerge for l1wad and l2wad
            //TODO(REFRESH-382): to save disk space, change to TMode::WadMerge (will need a unit test)

            bundle->Commands.push_back(TJupiterCmd{"MergeAttributres", "indexattrs.", TMode::Rebuild, TDeploy::Gradual, noExtensions, {},
                    [](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                        NJupiter::MergeAttributes(NJupiter::TInProcArgs(smOpts, prep));
                    }});
            bundle->Commands.push_back(TJupiterCmd{"MergeIndexAttrsSegTree", "indexattrs.", TMode::Rebuild, TDeploy::Gradual, noExtensions, {},
                    [](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                        NJupiter::MergeIndexAttrsSegTree(NJupiter::TInProcArgs(smOpts, prep));
                    }});
            // TODO(JUPITER-1047): MergeIndexAttrsSegTree output is not controlled by indexdump.py. Fix it.

            bundle->PrepCommands.push_back(TMercuryPrepCmd{"MergeCounts", NJupiter::CreateMergeCounts()}); // keyinv_counts(n portions) -> counts_keyinv (1 portion)
            bundle->SpecialCommands.insert("RebuildCountsSaas");
            bundle->SpecialCommands.insert("BuildPanther");
            bundle->Commands.push_back(TJupiterCmd{"MergePanther", "indexpanther", TMode::Rebuild, TDeploy::Gradual, new NFusion::TBuildFromMergedPortions(), {},
                    [](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                        NJupiter::MergePanther(NJupiter::TInProcArgs(smOpts, prep));
                    }});
            bundle->SpecialCommands.insert("TempFixPanther");
        }

        if (ver >= 160) {
            // FRESHNESS-3912 (under pron)
            bundle->SpecialCommands.insert("BuildPantherDssmEmbeddings");
        }

        if (ver >= 170) {
            bundle->Commands.push_back(TJupiterCmd{"MergeNavSrcStrict", "indexnavutf", TMode::Rebuild, TDeploy::Gradual, noExtensions, {},
                    [](NJupiter::TShardMergerOpts& smOpts, NYT::IIOClient& prep) {
                        NJupiter::MergeNavSrcStrict(NJupiter::TInProcArgs(smOpts, prep));
                    }});
        }

        return bundle;
    }

    static TMutex gTensorFlowMutex;

    inline NJupiter::TBuildPantherOpts FillPantherOpts() {
        NJupiter::TBuildPantherOpts pantherOptsObj;
        pantherOptsObj.TopKishkaLength = 32;
        NDoom::TPantherIndexInfo pantherOpts;
        pantherOpts.SetMinUnigramRelevance(0); // All documents have high relevance
        pantherOpts.SetMinBigramRelevance(0); // All documents have high relevance
        pantherOpts.SetRelevanceMultiplier(100);
        pantherOpts.ClearWordMaskWeight();
        pantherOpts.SetMaxKishkaLength(Max<ui32>());
        pantherOpts.SetPantherVersion("0.1");
        pantherOpts.SetSuperLemmerVersion("v1");
        pantherOptsObj.PantherIndexInfoByTier.insert(std::make_pair((TString)fakeMercuryShard, pantherOpts));
        return pantherOptsObj;
    }

    // options are copied from https://a.yandex-team.ru/arc/trunk/arcadia/robot/jupiter/cmpy/shards_prepare/__init__.py?rev=4736685#L732
    NJupiter::TBuildPantherOpts FillNgramOpts() {
        NJupiter::TBuildPantherOpts pantherOptsObj;
        pantherOptsObj.TopKishkaLength = Max<ui32>();
        NDoom::TPantherIndexInfo pantherOpts;
        pantherOpts.SetMinUnigramRelevance(0);
        pantherOpts.SetMinBigramRelevance(0);
        pantherOpts.SetRelevanceMultiplier(50);
        pantherOpts.SetWordMaskWeight(1.0);
        pantherOpts.SetMaxKishkaLength(1000);
        pantherOpts.SetPantherVersion("0.1");
        pantherOpts.SetSuperLemmerVersion("v1");
        pantherOptsObj.PantherIndexInfoByTier.insert(std::make_pair((TString)fakeMercuryShard, pantherOpts));
        return pantherOptsObj;
    }

    void TExtBuilderTask::MergePortions(const TMercuryCmds& /*allCommands*/, const TSpecialCmds& specCommands) {
        //TODO(REFRESH-383): remove the copypaste added in https://a.yandex-team.ru/arc/commit/4745942 (Why we create remapper every time?..)
        //TODO(REFRESH-383): make the code here more readable - use NRtDoc::TBuilderStage, avoid specCommands
        if (specCommands.count("RebuildCountsSaas")) {
            auto remapper = MakeIntrusive<NRtDoc::TSegmentHitRemapper>();
            TMercuryPrepCmd prepCmd{"RebuildCountsSaas", NJupiter::CreateRebuildCountsSaas(remapper->GetContext())};
            MergePortion(prepCmd, remapper);
        }
        if (specCommands.contains("MergeAttrsSaas")) {
            auto remapper = MakeIntrusive<NRtDoc::TSegmentHitRemapper>();
            TMercuryPrepCmd prepCmd{"MergeAttrsSaas", NJupiter::CreateMergeAttrs(remapper->GetContext())};
            MergePortion(prepCmd, remapper);
        }

        auto frequentTerms = LoadFrequentTermsLazy(Task.GetModelsDir());

        if (specCommands.count("BuildPanther")) {
            const bool isUnigramBigramPanther = true;

            NJupiter::TBuildPantherOpts pantherOptsObj = FillPantherOpts();
            TMercuryPrepCmd prepCmd;
            if (BundleVersion->GetGlobalNo() >= 190) {
                auto mostFrequentTermsPath = TFsPath{Task.GetModelsDir()} / mostFrequentTermsGlobalCountersFileName;
                prepCmd = {"BuildPanther", NJupiter::CreateBuildPanther(pantherOptsObj, isUnigramBigramPanther, *frequentTerms, mostFrequentTermsPath, /*shard=*/0)};
            } else {
                prepCmd = {"BuildPanther", NJupiter::CreateBuildPanther(pantherOptsObj, isUnigramBigramPanther, *frequentTerms, /*shard=*/0)};
            }
            ReduceMergedPortion(prepCmd);

        }

        if (specCommands.count("BuildPantherDssmEmbeddings") || specCommands.count("RebuildPantherDssmEmbeddings")) {
            const bool rebuild = specCommands.count("RebuildPantherDssmEmbeddings");

            TMercuryCmds prepCmds;
            TFsPath modelsPath(Task.GetModelsDir());
            Y_ENSURE(modelsPath.IsDefined());
            modelsPath.CheckExists();
            Y_ENSURE(modelsPath.IsDirectory());

            if (!rebuild) {
                NRtDoc::TBuilderTask::TBuilderInput delta = GetDeltaInput(Task);
                TMercuryPrepCmd filterDssmByDoc = TMercuryPrepCmd{"FilterPantherDssmTermsByDocId", CreateFilterPantherDssmByDoc(delta.GetSrcMapping())};
                TPortionsBundle(InitPreparePortions(true), Log, TimerLog).RewritePortion(filterDssmByDoc);
            }

            // Simple trick to reduce the memory consumption - REFRESH-378
            TMaybe<TGuard<TMutex>> g;
            if (BundleVersion->GetGlobalNo() < 171) {
                g.ConstructInPlace(gTensorFlowMutex);
            }

            const TString modelsPathPrefix = (TString)modelsPath.Fix() + "/";
            prepCmds.emplace_back(TMercuryPrepCmd{"BuildPantherDssmEmbeddings",
                NJupiter::CreateBuildPantherDssmEmbeddings({
                    .ModelsPathPrefix = modelsPathPrefix,
                    .UseJobStats = false,
                    .UseItdItpModel = false,
                    .UseTFModel = BundleVersion->GetGlobalNo() < 171,
                    .Encoder = {*frequentTerms},
                })});
            PreparePortions(prepCmds, {}, /*isDelta=*/!rebuild);
        }

        if (specCommands.count("MercuryL2Fix")) {
            FixMercuryL2(*InitPreparePortions(true), HeavyDataLumpsEnabled);
        }
    }

    NRtDoc::TBuilderLocalClientPtr TExtBuilderTask::CreateLocalClient(const NRtDoc::IBuilderInputs& task, bool delta) {
        auto clientImpl = MakeIntrusive<NRtDoc::TBuilderLocalClient>(task, delta);
        clientImpl->SetStopSignal(Stop);
        clientImpl->RegisterPreprocessor(new TPlutoniumDocId());
        clientImpl->SetPortionsInput(Portions, task.GetDeltaDir());
        clientImpl->SetTmpfsDir(task.GetTmpfsDir());
        TPortionsBundle::RegisterDocIdSetters(*clientImpl);
        return clientImpl;
    }

    NRtDoc::TBuilderLocalClientPtr TExtBuilderTask::CreatePortionsClient(const NRtDoc::IBuilderInputs& task, bool delta) {
        if (!Portions) {
            Portions = MakeIntrusive<NRtDoc::TBuilderPortionsConfig>("portions.");
        }

        const NRtDoc::TBuilderLocalClientPtr clientImpl = CreateLocalClient(task, delta);

        const TFsPath deltaDir(task.GetDeltaDir());
        NRtDoc::ILocalClient& deltaPortionsWriter = *clientImpl;
        deltaPortionsWriter.SetPortionsOutput(MakeIntrusive<NRtDoc::TPortionsOutput>(deltaDir, Portions));
        return clientImpl;
    }

    NRtDoc::TBuilderLocalClientPtr TExtBuilderTask::CreateReducePortionsClient(const NRtDoc::IBuilderInputs& task, const TString& finalDir) {
        if (!Portions) {
            Portions = MakeIntrusive<NRtDoc::TBuilderPortionsConfig>("portions.");
        }

        // Create a task that reads OutputDir/prep.portions.tbl_name and outputs to DeltaDir
        // this is a "step back" from the nextgen
        NRtDoc::TBuilderTask taskReduceMerged;
        auto* input = taskReduceMerged.AddInputs();
        input->SetSrcDir(finalDir);
        // no SetSrcMapping, because there should be no mapping in the finalDir
        input->SetIsFinalIndex(true);

        const TFsPath deltaDir = task.GetDeltaDir();
        auto* output = taskReduceMerged.MutableOutput();
        output->SetTempDir(deltaDir);
        output->SetTrgDir(finalDir);
        output->SetTmpfsDir(task.GetTmpfsDir());

        auto clientImpl = MakeIntrusive<NRtDoc::TBuilderLocalClient>(taskReduceMerged, /*delta=*/false);
        clientImpl->SetStopSignal(Stop);
        auto portionsConf = Portions->Clone("prep.portions.");
        TPortionsBundle::RegisterPortionIds(*portionsConf);
        clientImpl->SetPortionsInput(portionsConf, finalDir);
        clientImpl->SetPortionsOutput(MakeIntrusive<NRtDoc::TPortionsOutput>(deltaDir, Portions));
        TPortionsBundle::RegisterDocIdSetters(*clientImpl);
        return clientImpl;
    }

    void TExtBuilderTask::InitRebuildMode() {
        IsRebuildMode = IsTrue(GetEnv("REBUILD_ON_MERGE"));
        for (const auto& input : Task.GetInputs()) {
            if (input.GetIsFinalIndex()
                && RebuildTag && ReadSegmentRebuildTag(input.GetSrcDir()) != RebuildTag)
            {
                IsRebuildMode = true;
            }
        }
        LOG << TLOG_INFO << "IsRebuildMode = " << (IsRebuildMode ? "on" : "off");
    }

    void TExtBuilderTask::Init(const NRtDoc::TBuilderConfig& taskCfg) {
        TTimerWrapper w(TimerLog, "TExtBuilderTask.Init");
        BundleVersion.ConstructInPlace(taskCfg.GetBundleVersion());
        Bundle = GetBundle(*BundleVersion, Task.GetModelsDir(), Task.GetOutput().GetTrgDir());
        //TODO(yrum, 20190225): refactor - use ShardOpts.OutputDir instead of GetTrgDir() everywhere in this class

        RebuildTag = LoadRebuildTag();
        LOG << TLOG_INFO << "RebuildTag = " << RebuildTag;

        InitRebuildMode();
        if (IsRebuildMode) {
            for (auto& input : *Task.MutableInputs()) {
                input.SetIsFinalIndex(false);
            }
        }

        TJupiterCmds& commands = Bundle->Commands;
        for (TJupiterCmd& cmd : commands) {
            if (cmd.Extensions) {
                cmd.Extensions->Init(cmd, *BundleVersion);
            }
        }

        const bool keepAllPreparates = Task.GetConfig().GetForceKeepPreparates();
        auto& prepOpts = Bundle->SavedPreparates;
        prepOpts.KeepAll = keepAllPreparates;
        prepOpts.Enabled = prepOpts.Enabled || keepAllPreparates || std::any_of(commands.begin(), commands.end(), [](const TJupiterCmd& cmd) {
                return cmd.Mode == NBundleMeta::TMode::Rebuild && (!cmd.Extensions || !cmd.Extensions->UsesCustomRebuild());
            });

        if (prepOpts.Enabled && !prepOpts.KeepAll && prepOpts.Filter.empty()) {
            Y_ASSERT(0); // possibly incorrect configuration
            prepOpts.KeepAll = true;
        }

        if (keepAllPreparates && taskCfg.GetForceRebuild()) {
            for (size_t i = 0; i < commands.size(); ++i) {
                if (commands[i].Mode == NBundleMeta::TMode::WadMerge || commands[i].Mode == NBundleMeta::TMode::WadCustomMerge) {
                    commands[i].Mode = NBundleMeta::TMode::Rebuild;
                }
            }
        }

        WadDeltaMode = Task.GetConfig().GetForceKeepPreparates() || std::any_of(commands.begin(), commands.end(), [](const TJupiterCmd& cmd) {
                return cmd.Mode == NBundleMeta::TMode::WadMerge || cmd.Mode == NBundleMeta::TMode::WadCustomMerge;
            });

        ShardsOpts.OutputDir = Task.GetOutput().GetTrgDir();
        ShardsOpts.TmpDir = TFsPath(Task.GetOutput().GetTempDir());
        ShardsOpts.StatsDir = ShardsOpts.TmpDir;
        ShardsOpts.Shard = /*NMercury::FakeMercuryShard==*/"-";

        PrepsToExclude = {
            "attrs_keyinv",
            "erf_doc_lumps",
            "erf_global_lumps",
            "ext_info_arc",
            "host_attrs_doc_lumps",
            "host_attrs_global_lumps",
            "key_inv_sentence_lengths_doc_lumps",
            "linkann_data_doc_lumps",
            "linkann_sentence_lengths_doc_lumps",
        };
        if (BundleVersion->GlobalNo < 199) {
            PrepsToExclude.push_back("calculated_attrs_with_keyinv_doc_chunk_mapping_single_chunk");
            PrepsToExclude.push_back("host_attrs");
            PrepsToExclude.push_back("walrus");
            PrepsToExclude.push_back("walrus_with_keyinv_doc_chunk_mapping_single_chunk");
        }

        HeavyDataLumpsEnabled = taskCfg.GetHeavyDataLumpsEnabled();
    }

    void TExtBuilderTask::GenerateNewPudgeMapping() {
        TTimerWrapper w(TimerLog, "TExtBuilderTask.GenerateNewPudgeMapping");
        if (!Portions) {
            Portions = MakeIntrusive<NRtDoc::TBuilderPortionsConfig>("portions.");
            TPortionsBundle::RegisterPortionIds(*Portions);
        }
        PudgeData.GenerateNewMapping(Task, [this](const TString& outputDir) {
            auto client = CreateLocalClient(Task, /*delta=*/true);
            client->SetPortionsOutput(MakeIntrusive<NRtDoc::TPortionsOutput>(outputDir, Portions));
            return client;
        });
    }

    void TExtBuilderTask::DebugDump(const NRtDoc::TBuilderTask& task) {
        NFusion::DumpBuilderTask(task);
    }

    void TExtBuilderTask::MergePreparates() {
        TTimerWrapper w(TimerLog, "TExtBuilderTask.MergePreparates");
        const TPrepArchiveConfig& optsFromBundle = Bundle->SavedPreparates;
        if (!optsFromBundle.Enabled)
            return;

        NRtDoc::TPrepArchiveConfig prepConfig;

        if (Bundle->SpecialCommands.contains("RepackPreparatesArchive") || optsFromBundle.KeepAll) {
            prepConfig.SetRepackMain(true);
            LOG << "RepackPreparatesArchive" << " enabled";
        }

        if (!optsFromBundle.KeepAll) {
            for (const auto& prepName : optsFromBundle.Filter) {
                *prepConfig.AddIncludeToMain() = prepName;
            }
        } else {
            //Temporary patch, for Quick2 devopsing (REFRESH-377)
            //Filter out these even if ForceKeepPreparates == true
            for (const TStringBuf prepId : PrepsToExclude) {
                *prepConfig.AddExcludeFromMain() = prepId;
            }
        }

        LOG << "MergePreparates";

        const auto prepFileName = NRtDoc::TBuilderFiles::MainPrepFile;
        NRtDoc::MergePrepFiles(prepFileName, Task, Task.GetOutput().GetTrgDir(), prepConfig);
    }

    void TExtBuilderTask::MigrateIndexes(TJupiterCmds& allCommands, const TSpecialCmds& specCommands) {
        TTimerWrapper w(TimerLog, "TExtBuilderTask.MigrateIndexes");
        NRtDoc::IBuilderInputs inputs(Task);
        for (TJupiterCmd& cmd : allCommands) {
            if (cmd.Mode == NBundleMeta::TMode::Rebuild)
                continue; // for TMode::Rebuild commands, PreprocessInputs is called later

            const auto& extensions = cmd.Extensions;
            if (extensions) {
                LOG << "PreprocessInputs " << cmd.DisplayName;
                extensions->PreprocessInputs(inputs);
            }

            if (cmd.Deploy == NBundleMeta::TDeploy::RebuildOrMerge) {
                Y_ENSURE(cmd.Mode == NBundleMeta::TMode::WadMerge || cmd.Mode == NBundleMeta::TMode::WadCustomMerge, cmd.DisplayName << ": invalid Deploy option");
                Y_ENSURE(extensions, cmd.DisplayName << ": invalid Deploy option");
                if (extensions->ShouldSwitchToRebuild()) {
                    LOG << "SwitchToRebuild " << cmd.DisplayName;
                    cmd.Mode = NBundleMeta::TMode::WadCustomMerge;
                }
            }
        }

        if (specCommands.contains("TempFixPanther")) {
            for (size_t i = 0; i < inputs.InputsSize(); ++i) {
                const auto& input = inputs.GetInputs(i);
                if (input.GetIsFinalIndex()) {
                    NFusion::FakePantherInfo(
                            TFsPath(input.GetSrcDir()) / "index",
                            (TString)fakeMercuryShard,
                            FillPantherOpts());
                }
            }
        }
    }

    void TExtBuilderTask::BuildFullIndexes(const TJupiterCmds& allCommands, const TSpecialCmds& specCommands) {
        if (!Portions) {
            Portions = MakeIntrusive<NRtDoc::TBuilderPortionsConfig>("portions.");
            TPortionsBundle::RegisterPortionIds(*Portions);
        }

        NRtDoc::IBuilderInputs inputs(Task);
        for (const TJupiterCmd& cmd : allCommands) {
            if (cmd.Mode != NBundleMeta::TMode::Rebuild)
                continue;
            //TODO(yrum, 20190227): UsesCustomRebuild is not needed?
            const auto& extensions = cmd.Extensions;
            if (extensions) {
                TTimerWrapper w(TimerLog, "PreprocessInputs." + cmd.DisplayName);
                LOG << "PreprocessInputs " << cmd.DisplayName;
                extensions->PreprocessInputs(inputs);
            }
        }

        NRtDoc::TBuilderLocalClientPtr mergedPrepReader;
        for (const TJupiterCmd& cmd : allCommands) {
            TTimerWrapper w(TimerLog, "BuildFullIndex." + cmd.DisplayName);
            if (cmd.Mode != NBundleMeta::TMode::Rebuild)
                continue;

            LOG << "BuildFullIndex " << cmd.DisplayName;

            const bool usesCustomRebuild = !cmd.Extensions ? false : cmd.Extensions->UsesCustomRebuild();

            Y_ENSURE(Bundle->SavedPreparates.Enabled || usesCustomRebuild);
            if (!mergedPrepReader) {
                mergedPrepReader = CreateLocalClient(Task, /*delta=*/false);
                mergedPrepReader->SetPortionsInput(Portions, inputs.GetDeltaDir());
                TPortionsBundle::AddDocIdSettersForStage(*mergedPrepReader, TPortionsBundle::EStage::Final);
            }

            NJupiter::TShardMergerOpts smOpts = ShardsOpts;
            smOpts.FilePrefix = cmd.OutputPrefix;
            if (BundleVersion->GetGlobalNo() >= 173) {
                smOpts.PrevIndexDir = PudgeData.GetShardsOptsPrevIndexDir();
            }
            cmd.Method(smOpts, *mergedPrepReader);
        }

        if (specCommands.contains("TempFixPanther")) {
            NFusion::FakePantherInfo(
                    TFsPath(Task.GetOutput().GetTrgDir()) / "index",
                    (TString)fakeMercuryShard,
                    FillPantherOpts());
        }
    }

    NRtDoc::TBuilderLocalClientPtr TExtBuilderTask::InitPreparePortions(bool delta, bool hostData) {
        NRtDoc::TBuilderLocalClientPtr client;
        if (delta && !hostData) {
            InitDeltaTask();
            client = CreatePortionsClient(DeltaTask, /*delta=*/true);
        } else if (delta) {
            const NRtDoc::TBuilderTask& mergerTask = Task;
            client = CreatePortionsClient(mergerTask, /*delta*/true);
        } else {
            NRtDoc::TBuilderTask mergerTask = Task;
            for (size_t i = 0; i < mergerTask.InputsSize(); ++i) {
                if (!mergerTask.GetInputs(i).GetIsFinalIndex()) {
                    mergerTask.MutableInputs(i)->SetIsFinalIndex(true);
                }
            }
            client = CreatePortionsClient(mergerTask, /*delta=*/false);
        }
        return client;
    }

    void TExtBuilderTask::PreparePortions(const TMercuryCmds& allCommands, const TSpecialCmds& specCommands, bool delta) {
        if (!allCommands.empty()) {
            NRtDoc::TBuilderLocalClientPtr client = InitPreparePortions(delta, /*hostData=*/false);
            TPortionsBundle portions(client, Log, TimerLog);
            portions.Run(allCommands);
        }

        if (delta && specCommands.contains("ReduceHostAttrsByDocId")) {
            // host portions need to use final hostIds - MergerTask instead of DeltaTask
            const TFsPath indexAaDir = Task.GetOutput().GetTrgDir();
            NRtDoc::TBuilderLocalClientPtr specClient = InitPreparePortions(
                    /*delta=*/true,
                    /*hostData=*/true);

            TMercuryPrepCmd hostCmd{"ReduceHostAttrsByDocId", NRtDoc::CreateReduceHostAttrsByDocId(indexAaDir)};
            TPortionsBundle hostPortions(specClient, Log, TimerLog);
            hostPortions.RunTwice(hostCmd);
        }
    }


    void TExtBuilderTask::MergePortion(const TMercuryPrepCmd& command, NRtDoc::TSegmentHitRemapper::TPtr remapper) {
        Y_ENSURE(Portions);
        using namespace NRtDoc;

        TBuilderTask mergerTask = MakeMergerTask(Task, GetDeltaInput(Task));
        TBuilderLocalClientPtr client = CreatePortionsClient(mergerTask, /*delta=*/false);

        TPortionsBundle portions(client, Log, TimerLog);
        //TODO(REFRESH-383): replace this ad-hoc 'if' with something more understandable (invent special TBuilderStage for hit remappers?)
        if (remapper) {
            TPortionsBundle::AddDocIdSettersForStage(*client, TPortionsBundle::EStage::MergePortions);
        }
        constexpr bool keepDelta = false; // Will replace the delta data with the merged data (the only used mode)
        portions.MergePortions(command, remapper, Task.GetOutput().GetTrgDir(), keepDelta);
    }

    void TExtBuilderTask::WriteShardMetadata(const NRtDoc::TBuilderTask& builderTask) {
        using namespace NRtDoc;

        TBuilderLocalClientPtr client = CreatePortionsClient(builderTask, /*delta=*/true);
        TString portionName = TJupiterHelper::WriteUrlToShardStats(
                *client,
                /*NMercury::FakeMercuryShard==*/"-",
                builderTask.GetOutput().GetDocCountStat());
        client->Finish();
        client->PublishSimplePortion(portionName, Task.GetOutput().GetTrgDir());
    }

    void TExtBuilderTask::ReduceMergedPortion(const TMercuryPrepCmd& command) {
        Y_ENSURE(Portions);
        using namespace NRtDoc;

        const TFsPath deltaDir = IBuilderInputs(Task).GetDeltaDir();
        const TFsPath finalDir = Task.GetOutput().GetTrgDir();
        const TFsPath tmpfsDir = Task.GetOutput().GetTmpfsDir();

        TBuilderTask rmTask;
        auto* input = rmTask.AddInputs();
        input->SetSrcDir(finalDir);
        input->SetIsFinalIndex(true);

        rmTask.MutableOutput()->SetTempDir(deltaDir);
        rmTask.MutableOutput()->SetTmpfsDir(tmpfsDir);

        TBuilderLocalClientPtr client = CreateReducePortionsClient(rmTask, finalDir);
        TPortionsBundle portions(client, Log, TimerLog);
        portions.ReduceMergedPortions(command, deltaDir);
    }

    void TExtBuilderTask::InitDeltaTask() {
        if (!DeltaMapping.empty()) {
            return; //already initialized
        }

        DeltaMapping = FormatDeltaMappingPath(Task);
        DeltaTask = ::NFusion::MakeDeltaSubtask(Task, DeltaMapping);
        DeltaTask.MutableOutput()->SetTmpfsDir(Task.GetOutput().GetTmpfsDir());
    }

    void TExtBuilderTask::BuildWadDeltas(const TJupiterCmds& allCommands) {
        // called from BuildAndMergeWads
        Y_ASSERT(WadDeltaMode);
        Y_ENSURE(!DeltaMapping.empty());

        NRtDoc::TBuilderLocalClientPtr deltaPrepReader = CreateLocalClient(DeltaTask, /*delta=*/true);

        for (const TJupiterCmd& cmd : allCommands) {
            TTimerWrapper w(TimerLog, "TExtBuilderTask.BuildWadDeltas." + cmd.DisplayName);
            using NBundleMeta::TMode;
            if (cmd.Mode != TMode::WadMerge && cmd.Mode != TMode::WadCustomMerge && cmd.Mode != TMode::WadPudge)
                continue;

            Log << "BuildWadDelta " << cmd.DisplayName;

            NJupiter::TShardMergerOpts smOpts = ShardsOpts;
            smOpts.FilePrefix = cmd.OutputPrefix;
            smOpts.OutputDir = smOpts.TmpDir; // write to the temporary dir
            cmd.Method(smOpts, *deltaPrepReader);
        }
    }

    NRtDoc::IWadPatcher::TPtr TExtBuilderTask::CreateWadPatcher(const TJupiterCmd& command, const TString& indexaaPrefix) {
        if (command.DisplayName == "MergeErf") { //TODO(yrum): refactor to a enum instread
            const bool indexaaExists = NFs::Exists(indexaaPrefix + "aa") || NFs::Exists(indexaaPrefix + "aa.wad");

            THolder<TWadPatcher> result = MakeHolder<TWadPatcher>();
            if (Y_LIKELY(indexaaExists)) {
                NRtDoc::TCategMap::TPtr categMap = MakeIntrusive<NRtDoc::TCategMap>(indexaaPrefix);
                result->Add(MakeIntrusive<NRtDoc::TErfHitPatcher>(categMap));
            } else  {
                Y_ENSURE(!Asserts.MustHaveAa(), command.DisplayName << " expects indexaa at " << indexaaPrefix);
            }

            result->Init();
            return result.Release();
        }

        return nullptr;
    }

    NRtDoc::IWadMerger::TPtr TExtBuilderTask::CreateWadMerger(const TJupiterCmd& command) {
        NRtDoc::IWadMerger::TPtr merger;
        if (command.Extensions) {
            merger = command.Extensions->CreateWadCustomMerger();
            Y_ENSURE(!merger || command.Mode != NBundleMeta::TMode::WadMerge, "WadMerge commands are not allowed to have custom merge routines, try WadCustomMerge");
        }

        if (!merger) {
            merger = new NRtDoc::TWadMerger();
        }

        return merger;
    }

    NRtDoc::TBuilderTask TExtBuilderTask::MakeMergerTask(const NRtDoc::TBuilderTask& builderTask, const NRtDoc::TBuilderTask::TBuilderInput& deltaInput) {
        bool mergerDeltaMode = deltaInput.HasSrcDir();
        bool hasAddedDelta = false;

        NRtDoc::TBuilderTask result;

        result.MutableOutput()->CopyFrom(builderTask.GetOutput());

        for (size_t i = 0; i < builderTask.InputsSize(); ++i) {
            const auto& input = builderTask.GetInputs(i);
            if (input.GetIsFinalIndex() || !mergerDeltaMode) {
                auto* item = result.AddInputs();
                item->SetSrcDir(input.GetSrcDir());
                item->SetSrcMapping(input.GetSrcMapping());
                item->SetIsFinalIndex(true); // means "isDelta == false"

                // Note: in the legacy !deltaMode, MakeMergerTask changes IsFinalIndex: false->true for the prep indexes.
                // TODO(yrum, 20190216): remove the legacy mode "!deltaMode"
            } else {
                Y_ENSURE(!hasAddedDelta, "incorrect Task for WadMerge: prep Inputs should be in a row");
                hasAddedDelta = true;
                while (i < builderTask.InputsSize() && !builderTask.GetInputs(i).GetIsFinalIndex()) {
                    ++i;
                }
                auto* item = result.AddInputs();
                item->SetSrcDir(deltaInput.GetSrcDir());
                item->SetSrcMapping(deltaInput.GetSrcMapping());
                item->SetIsFinalIndex(false); // means "isDelta == true"
            }
        }
        return result;
    }

    void TExtBuilderTask::MergeWads(const TJupiterCmds& allCommands) {
        // called from BuildAndMergeWads
        Y_ASSERT(WadDeltaMode);
        Y_ENSURE(NFs::Exists(DeltaMapping));

        using namespace NDoom;
        using namespace NRtDoc;
        using NBundleMeta::TMode;
        using NBundleMeta::TDeploy;
        using TBuilderInput = TBuilderTask::TBuilderInput;

        TFsPath outputDir(Task.GetOutput().GetTrgDir());

        // to remap hostid and domain id, the current implementation utilizes indexaa that is pre-built by the main merger. Will be changed in the future
        const TString indexaaPrefix = outputDir / "index";

        // Helps to configure reading from WadDelta (a helper)
        auto deltaInput = [](const TString& deltaDir, const TString& deltaMap) {
            //TODO(yrum): use GetDeltaInput() after alexbykov@ finally commits MergeGroupAttrs
            TBuilderInput input;
            input.SetSrcDir(deltaDir);
            input.SetSrcMapping(deltaMap);
            return input;
        };

        for (const TJupiterCmd& cmd : allCommands) {
            TTimerWrapper w(TimerLog, "TExtBuilderTask.MergeWads." + cmd.DisplayName);
            if (cmd.Mode != TMode::WadMerge && cmd.Mode != TMode::WadCustomMerge)
                continue;

            LOG << "MergeWad " << cmd.DisplayName;

            NRtDoc::IWadPatcher::TPtr wadPatcher = CreateWadPatcher(cmd, indexaaPrefix);
            NRtDoc::IWadMerger::TPtr merger = CreateWadMerger(cmd);

            const NRtDoc::TWadMergerMeta mergerMeta = merger->GetMeta();
            const bool deltaMode = mergerMeta.DeltaMode;

            const TBuilderTask mergerInputs = MakeMergerTask(Task, !deltaMode ? TBuilderInput() : deltaInput(Task.GetOutput().GetTempDir(), DeltaMapping));

            const TString wadName = ConstructIndexFileName(cmd.Extensions.Get(), cmd.OutputPrefix);
            const TFsPath wadPath = outputDir / wadName; // will be silently overwritten if exists

            merger->Init(wadPath);
            for (size_t i = 0; i < mergerInputs.InputsSize(); ++i) {
                const TBuilderTask::TBuilderInput& input = mergerInputs.GetInputs(i);
                const TFsPath srcDir(input.GetSrcDir());
                const TFsPath wadFileName = srcDir / wadName;
                const TString displayName = srcDir.Basename();
                bool isDeltaInput;
                auto docIdMap = MakeHolder<TDocIdMap>();

                const bool fileExists = wadFileName.Exists();
                if (input.GetIsFinalIndex()) {
                    // Add index_ directory to merger (prep_index_ is possible if !deltaMode)
                    Y_ENSURE(fileExists || cmd.Deploy != TDeploy::PanicIfMissing, cmd.DisplayName << "/WadMerge: file not found " << wadFileName);
                    if (!fileExists)
                        continue;
                    isDeltaInput = false;
                } else {
                    // Add delta directory to merger
                    isDeltaInput = true;
                    Y_ENSURE(wadFileName.Exists(), cmd.DisplayName << "/WadMerge: file not found " << wadFileName);
                }

                TDocIdMapIo::Load(&*docIdMap, input.GetSrcMapping());
                merger->Add(wadFileName, std::move(docIdMap), wadPatcher, displayName, isDeltaInput);
            }

            Y_ENSURE(!merger->Empty() || cmd.Deploy != TDeploy::PanicIfMissing, cmd.DisplayName << "/WadMerge: no input Wads for " << wadName);
            if (!merger->Empty()) {
                merger->Finish();
            }
        }
    }


    void TExtBuilderTask::BuildAndMergeWads(const TJupiterCmds& allCommands) {
        if (!WadDeltaMode)
            return;
        InitDeltaTask();
        BuildWadDeltas(allCommands);
        MergeWads(allCommands);
    }

    void TExtBuilderTask::PudgeWads(const TJupiterCmds& allCommands) {
        for (const TJupiterCmd& cmd : allCommands) {
            if (cmd.Mode != NBundleMeta::TMode::WadPudge)
                continue;
            Y_FAIL("not implemented");
        }
    }

    TString TExtBuilderTask::LoadRebuildTag() const {
        return GetEnv("MERGE_REBUILD_TAG");
    }

    void TExtBuilderTask::WriteRebuildTagIfNeed() const {
        if (!RebuildTag) {
            return;
        }
        const TString tagFileName = Task.GetOutput().GetTrgDir() + "/" + RebuildTagFileName;
        TFileOutput out(tagFileName);
        out << RebuildTag << '\n';
        out.Flush();
    }

    TString TExtBuilderTask::ReadSegmentRebuildTag(const TString& dir) const {
        TString tag;
        const TString tagFileName = dir + "/" + RebuildTagFileName;
        if (NFs::Exists(tagFileName)) {
            TFileInput in(tagFileName);
            tag = in.ReadLine();
        }
        return tag;
    }

    void TExtBuilderTask::RunShardsPrepare() {
        LOG << TLOG_INFO << "RunShardsPrepare";
        TTimerWrapper w(TimerLog, "TExtBuilderTask.RunShardsPrepare");

        TVector<THolder<NJupiter::IMercuryCmd>> commands;
        commands.push_back(NJupiter::CreatePrepareStaticFeatures(/* no models */""));
        commands.push_back(NJupiter::CreateExternalRelevAttrsPrepare());
        commands.push_back(NJupiter::CreateHostAttrsPrepare());
        if (HeavyDataLumpsEnabled) {
            commands.push_back(NJupiter::CreateContentAttrsPrepare());
            commands.push_back(NJupiter::CreateUrlHeavyDataPrepare());
        } else {
            commands.push_back(NJupiter::CreateContentAttrsPrepareOld());
        }
        commands.push_back(NJupiter::CreateCalculatedAttrsPrepare());
        commands.push_back(NJupiter::CreatePrepareKeyInvSentenceLengths());
        commands.push_back(NJupiter::CreateBuildExtInfoArc(/* no models */""));
        if (HeavyDataLumpsEnabled) {
            commands.push_back(NJupiter::CreateCompressUrlTitleEmbeddingsToBatches());
        } else {
            commands.push_back(NJupiter::CreateCompressUrlTitleEmbeddingsToBatchesOld());
        }
        commands.push_back(NJupiter::CreateBuildAttrs());
        if (HeavyDataLumpsEnabled) {
            commands.push_back(NJupiter::CreatePrepareHnswLogDwellTimeBigrams());
        } else {
            commands.push_back(NJupiter::CreatePrepareHnswLogDwellTimeBigramsOld());
        }
        commands.push_back(NJupiter::CreateBuildGroupAttrsConfigs());
        commands.push_back(NJupiter::CreateBuildNavSrcStrict());

        TShardsPrepareLumpsFiller filler(commands, HeavyDataLumpsEnabled);

        for (const auto& input : Task.GetInputs()) {
            if (!input.GetIsFinalIndex()) {
                LOG << TLOG_INFO << "RunShardsPrepare for " << input.GetSrcDir();
                const TString prepFileName = NRtDoc::IBuilderInputs::GetPrefixUpdated(input);
                RunMercuryShardPrepareProcessors(filler, input.GetSrcDir() + "/prep.lumps", Task.GetOutput().GetTmpfsDir() + "/" + prepFileName, HeavyDataLumpsEnabled);
            }
        }

        LOG << TLOG_INFO << "RunShardsPrepare finished";
    }

    void TExtBuilderTask::RemovePreviouslyUpdatedPrepLumps() {
        for (const auto& input : Task.GetInputs()) {
            const TString prepFileName = NRtDoc::IBuilderInputs::GetPrefixUpdated(input);
            const TString prepFile = Task.GetOutput().GetTmpfsDir() + "/" + prepFileName;
            const TString prepDirFile = prepFile + ".dir";
            for (const auto& file : {prepFile, prepDirFile}) {
                if (NFs::Exists(file)) {
                    Y_ENSURE(NFs::Remove(file));
                }
            }
        }
    }

    //sync run
    bool TExtBuilderTask::Run(NRtDoc::TBuilderTaskResult& /*result*/) {
        try {
            Y_ENSURE(!Bundle);

            TTimerWrapper w(TimerLog, "TExtBuilderTask.Run");

            Init(*Task.MutableConfig());
            RemovePreviouslyUpdatedPrepLumps();

            if (BundleVersion->GetGlobalNo() >= 173) {
                GenerateNewPudgeMapping();
            }

            WriteShardMetadata(Task);

            if (BundleVersion->GetGlobalNo() >= 172 && !Task.GetConfig().GetDisableShardsPrepare()) {
                RunShardsPrepare();
            }

            PreparePortions(Bundle->PriorPrepCommands, {}, false);
            BuildFullIndexes(Bundle->PriorCommands, {});

            MigrateIndexes(Bundle->Commands, Bundle->SpecialCommands);
            PreparePortions(Bundle->PrepCommands, Bundle->SpecialCommands, /*isDelta=*/true);
            BuildAndMergeWads(Bundle->Commands);

            MergePreparates();
            MergePortions(Bundle->PrepCommands, Bundle->SpecialCommands);
            BuildFullIndexes(Bundle->Commands, Bundle->SpecialCommands);

            BuildAndMergeWads(Bundle->LateCommands);

            WriteRebuildTagIfNeed();

            LOG << TLOG_INFO << "TExtBuilderTask completed";
            return true;
        } catch (NRtDoc::TOperationCancelledException& e) {
            LOG << TLOG_NOTICE << "TExtBuilderTask cancelled";
            return false;
        } catch (...) {
            LOG << TLOG_ERR << "TExtBuilderTask failed:" << CurrentExceptionMessage();
            Y_VERIFY(!Asserts.CrashOnMergeFail());

            return false;
        }
    }

    //TODO(yrum): a Dry Run per-commit test is needed (shardmerge already supports it)
}
