#pragma once

#include "wad_keyinv_merge.h"
#include "iter_heap.h"
#include <kernel/doom/key/key_decoder.h>
#include <kernel/doom/key/key_encoder.h>
#include <kernel/doom/offroad_doc_wad/offroad_factor_ann_doc_wad_io.h>
#include <kernel/doom/offroad_key_wad/offroad_key_wad_io.h>
#include <kernel/doom/adaptors/key_filtering_index_reader.h>
#include <kernel/doom/adaptors/multi_key_index_writer.h>
#include <kernel/doom/adaptors/encoding_index_writer.h>
#include <kernel/doom/standard_models_storage/standard_models_storage.h>
#include <library/cpp/logger/global/global.h>
#include <util/generic/vector.h>
#include <util/generic/array_ref.h>
#include <util/generic/algorithm.h>
#include <util/stream/str.h>


#define DBG_MEM_LOG(Action) \
        { DEBUG_LOG << "RESOURCES On " << Action << ": " << NLoggingImpl::GetSystemResources() << Endl; };

namespace NFusion {
    using namespace NDoom;

    template <typename TIteratorPtr>
    struct TLemmaComparer {
        bool operator()(const TIteratorPtr& a, const TIteratorPtr& b) const {
            Y_ASSERT(a && b && a->Valid() && b->Valid());
            return a->GetLemma() > b->GetLemma();
        }
    };

    template <typename TIterator>
    using TLemmaHeap = TIteratorHeap<TIterator, TLemmaComparer<TIterator*>>;

    using TTermIdRemap = THashMap<ui32, ui32>;

    // We need to sort here, because sorted encoded keys do not guarantee sorted lemmas (see REFRESH-297)
    class TSortedLemmasTable {
    public:
        using TKeyIo = TOffroadFactorAnnKeyWadIo;
        using TKey = TKeyIo::TReader::TKey;
        using TKeyData = TKeyIo::TReader::TKeyData;

        struct TItem {
            TDecodedKey Key;
            ui32 TermId;
        };


        static_assert(std::is_same<TKeyData, ui32>::value);

    private:
        const ui32 ChunkNo_;
        TVector<TItem> Keys_;
        TVector<ui32> AlphaSort_;
        TVector<ui32>::iterator Pos_;
        TTermIdRemap TermIdRemap_;

    public:
        TSortedLemmasTable(IWad* wad, ui32 chunkNo)
            : ChunkNo_(chunkNo)
        {
            Load(wad);
        };

        bool Valid() const {
            return Pos_ < AlphaSort_.end();
        }

        void Next() {
            ++Pos_;
        }

        TStringBuf GetLemma() const {
            return Keys_[*Pos_].Key.Lemma();
        }

        const TDecodedKey& GetKey() const {
            return Keys_[*Pos_].Key;
        }

        ui32 GetChunkNo() const {
            return ChunkNo_;
        }

        void SetNewTermId(ui32 newTermId) {
            const ui32 localTermId = Keys_[*Pos_].TermId;
            Y_ASSERT(!TermIdRemap_.contains(localTermId));
            TermIdRemap_[localTermId] = newTermId;
        }

    public:
        ui32 GetNewTermId(ui32 termId) const {
            Y_ASSERT(TermIdRemap_.contains(termId));
            return TermIdRemap_.at(termId);
        }

        THolder<TTermIdRemap> Finish() {
            {
                // we use swap() instead of clear(), because clear() does not deallocate
                TVector<TItem> devnull;
                devnull.swap(Keys_);
            }
            {
                TVector<ui32> devnull2;
                devnull2.swap(AlphaSort_);
            }
            THolder<TTermIdRemap> tmp = MakeHolder<TTermIdRemap>();
            tmp->swap(TermIdRemap_);
            return tmp;
        }
    private:
        void Load(IWad* wad) {
            TKeyDecoder decoder;
            TKeyIo::TReader reader(wad);
            Keys_.reserve(1024);
            Keys_.emplace_back();
            for (TStringBuf encodedKey; ; Keys_.emplace_back()) {
                TItem* item = &Keys_.back();
                if (!reader.ReadKey(&encodedKey, &item->TermId))
                    break;

                const bool parsed = decoder.Decode(encodedKey, &item->Key);
                if (Y_UNLIKELY(!parsed)) {
                    Keys_.pop_back();
                }
                if (Keys_.size() == Keys_.capacity()) {
                    Keys_.reserve(Keys_.size() + Min<size_t>(1 << 20, Keys_.size()));
                }
            }
            Keys_.pop_back();
            Keys_.shrink_to_fit();

            AlphaSort_.resize(Keys_.size());
            std::iota(AlphaSort_.begin(), AlphaSort_.end(), 0);

            const auto& keys = Keys_;
            Sort(AlphaSort_.begin(), AlphaSort_.end(), [&keys](ui32 a, ui32 b) {
                    return keys[a].Key.Lemma() < keys[b].Key.Lemma();
                    }
                );
            Pos_ = AlphaSort_.begin();
        }
    };

    // This code is (largely) adapted from TMultiKeyIndexWriter and may carry the known problems
    struct TOneKeyFormsMergingContext {
    public:
        using TKeyFormIndexHasher = NDoom::NPrivate::TKeyFormIndexHasher;
        using TKeyFormIndexesEqual = NDoom::NPrivate::TKeyFormIndexesEqual;

    public:
        TVector<TVector<ui32>> SourceKeysFormsMapping;

    public:
        TDecodedKey ResultKey;
        THashSet<size_t, TKeyFormIndexHasher, TKeyFormIndexesEqual> ResultKeyFormsSet;

    public:
        TOneKeyFormsMergingContext()
            : ResultKeyFormsSet(16, TKeyFormIndexHasher(ResultKey), TKeyFormIndexesEqual(ResultKey))
        {
        }

        void StartKey(const TStringBuf& lemma) {
            SourceKeysFormsMapping.clear();
            SourceKeysFormsMapping.reserve(5);
            ResultKey.Clear();
            ResultKey.SetLemma(lemma);
            ResultKeyFormsSet.clear();
        }
        void AddForms(ui32 chunkNo, const TDecodedKey& key) {
            if (chunkNo >= SourceKeysFormsMapping.size()) {
                SourceKeysFormsMapping.resize(chunkNo + 1);
            }

            TVector<ui32>& keyChunkFormsMapping = SourceKeysFormsMapping[chunkNo];
            keyChunkFormsMapping.resize(key.FormCount());
            TVector<ui32>::iterator pForm = keyChunkFormsMapping.begin();
            for (size_t i = 0; i < key.FormCount(); ++i) {
                ResultKey.AddForm(key.Form(i));
                size_t formIndex = ResultKey.FormCount() - 1;
                auto it = ResultKeyFormsSet.find(formIndex);
                if (it != ResultKeyFormsSet.end()) {
                    *pForm++ = *it;
                    ResultKey.PopBackForm();
                } else {
                    *pForm++ = formIndex;
                    ResultKeyFormsSet.insert(formIndex);
                }
            }
        }
    };

    class TFormsRemapData {
    public:
        using TArrayPos = ui32*;

        struct TKeyData {
            const TArrayPos Remap;
            const TArrayPos Hits;

            Y_FORCE_INLINE TArrayPos UsageCount() const {
                return Hits - 2;
            }

            Y_FORCE_INLINE TArrayPos FinalFormsCount() const {
                return Hits - 1;
            }

            Y_FORCE_INLINE TArrayRef<ui32> GetHitsMap() {
                return MakeArrayRef(Hits, *FinalFormsCount());
            }
        };

    private:
        THashMap<ui32, ui32> TermLookup;
        TVector<ui32> TermLookupFlat;
        TVector<ui32> Array;
        ui32 WriterPos = 0;
        ui32 MaxTermId = 0;

    public:
        static inline TArrayPos GetFormsForChunk(TArrayPos keyOff, ui32 chunkNo) {
            keyOff++;
            for (; chunkNo; --chunkNo){
                keyOff += *keyOff;
            }
            return keyOff;
        }

        static inline ui32 GetForm(TArrayPos keyOff, ui32 chunkNo, ui32 formId) {
            const TArrayPos forms = GetFormsForChunk(keyOff, chunkNo);
            const ui32 formsCount = forms[0];
            Y_ASSERT(formId < formsCount);
            return forms[formId + 1];
        }
    public:
        void Reserve(size_t hintTermsCount) {
            TermLookup.reserve(hintTermsCount + hintTermsCount / 2);
            Array.reserve(hintTermsCount * 12);
        }

        size_t GetMemorySize() const {
            return sizeof(ui32) * (TermLookupFlat.size() + Array.size());
        }

    public:
        static TString DebugDump(const TString& curLemma, ui32 termId, TArrayPos pos) {
            TKeyData data = TKeyData{pos, pos + *pos};
            TStringStream ss;
            pos++;
            ss << "term=" << termId << "; [";
            for (;;) {
                ui32 nFormsChunk = *pos++;
                if (nFormsChunk == 0)
                    break;
                ss << "[";
                nFormsChunk--;
                while (nFormsChunk--) {
                    ss << *pos++;
                    if (nFormsChunk)
                        ss << ",";
                }
                ss << "]";
            }
            Y_ASSERT(pos == data.UsageCount());
            ss << "];uc=" << *pos++;
            Y_ASSERT(pos == data.FinalFormsCount());
            ss << ";ffc=" << *pos++;
            ui32 nFinalForms = *data.FinalFormsCount();
            if (nFinalForms) {
                ss << ";[";
                while (nFinalForms--) {
                    ss << *pos++;
                    if (nFinalForms) {
                        ss << ",";
                    } else {
                        ss << "]";
                    }
                }
            }
            Y_ASSERT(pos == data.Hits + *data.FinalFormsCount());

            ss << "\t" << curLemma;
            ss.Finish();
            return TString(ss.Str());
        }

        TArrayPos AddKey(ui32 finalTermId, const TOneKeyFormsMergingContext& key) {
            Y_ASSERT(!IsFinalized());

            if (Array.size() < WriterPos + (64 << 10)) {
                size_t newSize = ClampVal<size_t>(Array.size() / 8, 256 * 1024, 16 * 1024 * 1024);
                Array.resize(Array.size() + newSize);
            }

            if (Y_LIKELY(finalTermId > MaxTermId)) {
                MaxTermId = finalTermId;
            }

            Y_ASSERT(!TermLookup.contains(finalTermId));

            TermLookup[finalTermId] = WriterPos;
            TVector<ui32>::iterator p0 = Array.begin() + WriterPos;
            TVector<ui32>::iterator p = p0 + 1;
            for (const TVector<ui32>& keyChunkFormsMapping : key.SourceKeysFormsMapping) {
                ui32 formsCount = keyChunkFormsMapping.size();
                if (formsCount) {
                    *p++ = keyChunkFormsMapping.size() + 1;
                    p = std::copy(keyChunkFormsMapping.begin(), keyChunkFormsMapping.end(), p);
                } else {
                    // create a bogus remapping 0->0
                    *p++ = 1 + 1;
                    *p++ = 0;
                }
            }
            *p++ = 0;
            TVector<ui32>::iterator stats = p;

            const ui32 keyUsageCount = 0;
            ui32 finalFormCount = key.ResultKey.FormCount();
            *stats++ = keyUsageCount; // key usage count
            *stats++ = finalFormCount;
            *p0 = stats - p0;

            // hit counts
            std::fill(stats, stats + finalFormCount, 0);
            WriterPos += *p0 + finalFormCount;

            Y_ASSERT(finalFormCount == *GetKey(finalTermId).FinalFormsCount());

            return &*p0;
        }

        TArrayPos GetKeyBlock(ui32 finalTermId) {
            if (!TermLookupFlat.empty()) {
                Y_ASSERT(finalTermId < TermLookupFlat.size());
                Y_ASSERT(TermLookupFlat[finalTermId] != Max<ui32>());
                return &Array[TermLookupFlat[finalTermId]];
            }
            auto i = TermLookup.find(finalTermId);
            Y_ASSERT(i != TermLookup.end());
            return &Array[i->second];
        }

        Y_FORCE_INLINE TKeyData GetKey(ui32 finalTermId) {
            TArrayPos pos = GetKeyBlock(finalTermId);
            return TKeyData{pos, pos + *pos};
        }

        void FinalizeLookup() {
            if (MaxTermId >= 2 * TermLookup.size()) {
                return; //too sparsed
            }

            Array.shrink_to_fit();
            TermLookupFlat.resize(MaxTermId + 1, Max<ui32>());
            for (const auto& kv : TermLookup) {
                Y_ASSERT(kv.first < TermLookupFlat.size());
                Y_ASSERT(kv.second < Array.size());
                TermLookupFlat[kv.first] = kv.second;
            }
            TermLookup.basic_clear();
        }

        Y_FORCE_INLINE bool IsFinalized() const{
            return !TermLookupFlat.empty();
        }
    };

    class TFormRemapBuilder {
    public:
        using TKeyIo = TOffroadFactorAnnKeyWadIo;
        using TIo = TOffroadFactorAnnDocWadIo;

        struct TInput {
            THolder<IWad> Wad;
            THolder<NRtDoc::TDocIdMap> DocIdMap;
        };

    private:
        TOneKeyFormsMergingContext KeyMerge;

    protected:
        TFormsRemapData Data;
        TVector<TDecodedKey> DecodedKeys;
        TVector<THolder<TTermIdRemap>> TermIdRemaps;
        TVector<std::pair<ui32, ui32>> KeyFreqSortTable;
        TVector<ui32> FinalTermIdRemap;

    public:
        void Reserve(size_t hintTermsCount) {
            Data.Reserve(hintTermsCount);
            //no reserve for DecodedKeys
            KeyFreqSortTable.reserve(hintTermsCount);
            FinalTermIdRemap.reserve(hintTermsCount);
        }

    public:
        void FlushKey(const ui32 termId) {
            Y_ASSERT(DecodedKeys.size() == termId);
            Data.AddKey(termId, KeyMerge);
            DecodedKeys.emplace_back(std::move(KeyMerge.ResultKey));
        }

        void MergeLemmas(const TVector<TInput>& inputs) {
            DBG_MEM_LOG("MergeLemmas load...");
            TVector<THolder<TSortedLemmasTable>> keyLists;
            ui32 chunkNo = 0;
            for (const TInput& input : inputs) {
                keyLists.push_back(MakeHolder<TSortedLemmasTable>(input.Wad.Get(), chunkNo++));
            }

            DBG_MEM_LOG("MergeLemmas load...OK");
            ::NFusion::TLemmaHeap<TSortedLemmasTable> heap;
            for (auto& src : keyLists) {
                heap.Add(src.Get());
            }

            TOneKeyFormsMergingContext key;
            TString lastLemma;
            ui32 termId = 0;
            for (; heap.Valid(); heap.Next()) {
                auto& head = *heap.GetHead();
                TStringBuf curLemma = head.GetLemma();
                if (curLemma.empty())
                    continue;
                if (lastLemma != curLemma) {
                    if (Y_LIKELY(!lastLemma.empty())) {
                        FlushKey(termId++);
                    }
                    lastLemma = curLemma;
                    KeyMerge.StartKey(curLemma);
                }
                KeyMerge.AddForms(head.GetChunkNo(), head.GetKey());
                head.SetNewTermId(termId);
            }
            if (!lastLemma.empty()) {
                FlushKey(termId++);
            }
            Data.FinalizeLookup();

            Y_ENSURE(DecodedKeys.size() == termId);

            DBG_MEM_LOG("MergeLemmas merged");
            for (auto& keyList : keyLists) {
                TermIdRemaps.emplace_back(keyList->Finish());
            }
            DBG_MEM_LOG("MergeLemmas detached");
            DEBUG_LOG << "MergeLemmas: TFormsRemapData nTerms=" << DecodedKeys.size() <<";memory_size=" << Data.GetMemorySize() << Endl;
            Reserve(termId);
        }

        void ReadHitsForStats(const TVector<TInput>& inputs) {
            DBG_MEM_LOG("ReadHitsForStats...");
            TVector<THolder<TIo::TReader>> readers;
            for (const auto& input : inputs) {
                readers.push_back(MakeHolder<TIo::TReader>(input.Wad.Get()));
            }

            ui32 chunkNo = 0;
            for (auto& rd : readers) {
                ui32 docId;
                const TTermIdRemap& termRemap = *TermIdRemaps[chunkNo];
                while (rd->ReadDoc(&docId)) {
                    if (inputs[chunkNo].DocIdMap->Map(docId) == NRtDoc::TDocIdMap::DeletedDocument()) {
                        continue;
                    }

                    TIo::TReader::THit hit;
                    while (rd->ReadHit(&hit)) {
                        const ui32 oldTermId = hit.DocId();
                        Y_ASSERT(termRemap.contains(oldTermId));
                        const ui32 termId = termRemap.at(oldTermId);
                        const auto data = Data.GetKey(termId);
                        (*data.UsageCount())++;

                        const ui32 finalFormsCount = *data.FinalFormsCount();
                        if (finalFormsCount) {
                            const ui32 formId = TFormsRemapData::GetForm(data.Remap, chunkNo, hit.Form());
                            Y_ASSERT(formId < *data.FinalFormsCount());
                            data.Hits[formId]++;
                        };
                    }
                }
                chunkNo++;
            }
            DBG_MEM_LOG("ReadHitsForStats...OK");
        }


        void RebuildKeys() {
            ui32 termId = 0;
            TVector<ui32> finalToTmpRenum;

            KeyFreqSortTable.resize(DecodedKeys.size());
            using TPair = std::pair<ui32, ui32>;
            TVector<TPair>::iterator p = KeyFreqSortTable.begin();
            for (ui32 tmpTermId = 0; tmpTermId < DecodedKeys.size(); ++tmpTermId) {
                *p++ = TPair(*Data.GetKey(tmpTermId).UsageCount(), tmpTermId);
            }

            Sort(KeyFreqSortTable.begin(), KeyFreqSortTable.end(), [](const TPair& a, const TPair& b) {
                        return a.first != b.first ? a.first > b.first : a.second < b.second;
                    });

            DBG_MEM_LOG("RebuildKeys...");
            for (auto& key : DecodedKeys) {
                TDecodedKey keyToWrite;
                const ui32 formCount = key.FormCount();
                finalToTmpRenum.resize(formCount);
                std::iota(finalToTmpRenum.begin(), finalToTmpRenum.end(), 0);
                auto data = Data.GetKey(termId++);
                auto hitStats = data.GetHitsMap();
                Y_ASSERT(hitStats.size() == formCount);

                auto pHitStats = hitStats.data();
                auto cmp = [&] (ui32 l, ui32 r) {
                    return pHitStats[l] != pHitStats[r] ? pHitStats[l] > pHitStats[r] : key.Form(l) < key.Form(r);
                };

                Sort(finalToTmpRenum.begin(), finalToTmpRenum.end(), cmp);

                ui32 newFormCount = formCount;
                for (ui32 finalFormId = 0; finalFormId < formCount; ++finalFormId) {
                    const ui32 tmpFormId = finalToTmpRenum[finalFormId];
                    if (Y_UNLIKELY(pHitStats[tmpFormId] == 0)) { // forms >= newFormId removed at merge
                        newFormCount = Min(newFormCount, finalFormId);
                    }
                }

                Y_ASSERT(std::all_of(finalToTmpRenum.begin() + newFormCount, finalToTmpRenum.end(), [&](ui32 newFormId) { return pHitStats[newFormId] == 0; }));

                // write final remap in place of hitstats
                auto pTmpToFinalRenum = pHitStats;
                for (ui32 finalFormId = 0; finalFormId < newFormCount; ++finalFormId) {
                    const ui32 tmpFormId = finalToTmpRenum[finalFormId];
                    pTmpToFinalRenum[tmpFormId] = finalFormId;
                }
                for (ui32 finalFormId = newFormCount; finalFormId < formCount; ++finalFormId) {
                    const ui32 tmpFormId = finalToTmpRenum[finalFormId];
                    pTmpToFinalRenum[tmpFormId] = Max<ui32>();
                }

                keyToWrite.Clear();
                keyToWrite.SetLemma(key.Lemma());
                for (ui32 finalFormId = 0; finalFormId < newFormCount; ++finalFormId) {
                    const ui32 tmpFormId = finalToTmpRenum[finalFormId];
                    Y_ASSERT(tmpFormId != Max<ui32>());
                    keyToWrite.AddForm(key.Form(tmpFormId));
                }
                key = std::move(keyToWrite);
            }
        }

        void BuildDecodings(const TVector<TInput>& inputs) {
            // Merge keys && write remappings
            MergeLemmas(inputs);
            DBG_MEM_LOG("MergeLemmas...OK");

            // read all hits and make stats for keys
            ReadHitsForStats(inputs);
            DBG_MEM_LOG("ReadHitsForStats...OK");

            // enum final keys, build final form remap instead of hits, write final keys with resorted forms
            RebuildKeys();
            DBG_MEM_LOG("RebuildKeys...OK");
        }
    };

    class TWadKeyInvMerger : public NRtDoc::IWadMerger, protected TFormRemapBuilder {
    public:
        using TInput = TFormRemapBuilder::TInput;

    public:
        TVector<TInput> Inputs;
        TString OutputFile;

    public:
        TWadKeyInvMerger()
        {
        }

        // NRtDoc::IWadMerger interface
        virtual void Init(const TString& outputFileName) override {
            OutputFile = outputFileName;
        }

        virtual void Add(const TString& wadPath, THolder<NRtDoc::TDocIdMap> docIdMap, NRtDoc::IWadPatcher::TPtr wadPatcher, const TString& /*displayName*/, bool /*isDelta*/) override {
            Y_ENSURE(!wadPatcher, "not supported");
            Inputs.emplace_back();
            Inputs.back().Wad = IWad::Open(wadPath, false);
            Inputs.back().DocIdMap = std::move(docIdMap);
        }

        virtual bool Empty() const override {
            return Inputs.empty();
        }

        virtual void Finish() override {
            Y_ENSURE(OutputFile); // Init() was called
            Y_ENSURE(!Empty());   // Add() was called

            THolder<IWadWriter> writer = MakeHolder<NDoom::TMegaWadWriter>(OutputFile);
            Merge(Inputs, writer.Get());
            writer->Finish();
        }

    private:
        static bool DebugCheckOrder(const TVector<ui32>& keyAlphaSortTable, const TVector<TString>& encodedKeys, const TVector<TDecodedKey>* decodedKeys) {
            const TString emptyString = "";
            const TString* prevEncodedKey = &emptyString;
            ui32 prevDecodedKeyN = Max<ui32>();
            for (ui32 tmpTermId : keyAlphaSortTable) {
                Y_ASSERT(tmpTermId != Max<ui32>());
                Y_ASSERT(tmpTermId < encodedKeys.size());

                const TString& encodedKey = encodedKeys[tmpTermId];
                Y_VERIFY(!encodedKey.empty());

                if (*prevEncodedKey >= encodedKey) {
                    TDecodedKey decodedKey;
                    if (decodedKeys != nullptr) {
                        decodedKey = (*decodedKeys)[tmpTermId];
                    }
                    Cerr << "ERROR identicalKeys: tmpTermId=" << tmpTermId << ";encodedKey=" << encodedKey << ";decodedKey="<< decodedKey << ";prevDecodedKey=";
                    if (prevDecodedKeyN != Max<ui32>() && decodedKeys) {
                        Cerr << (*decodedKeys)[prevDecodedKeyN] << Endl;
                    } else {
                        Cerr << "-" << Endl;
                    }
                    return false;
                }
            }
            return true;
        }

        void WriteKeys(IWadWriter* output) {
            TKeyIo::TModel keyModel = TStandardIoModelsStorage::Model<TKeyIo::TModel>(TKeyIo::DefaultModel);
            TKeyIo::TWriter writer(keyModel, output);
            TKeyEncoder encoder; // we do not use TEncodingIndexWriter, because it silently "increments" bad keys instead of falling

            DBG_MEM_LOG("WriteKeys remap finalTermId...");
            // assign final termIds
            ui32 finalTermId = 0;
            TDecodedKey keyToWrite;
            FinalTermIdRemap.assign(DecodedKeys.size(), Max<ui32>());

            for (auto& m : KeyFreqSortTable) {
                const ui32 usageCount = m.first;
                ui32 tmpTermId = m.second;
                if (usageCount == 0) {
                    continue; // key removed at merge (not used)
                }
                FinalTermIdRemap[tmpTermId] = finalTermId++;
            }

            TVector<TString> encodedKeys;
            encodedKeys.resize(DecodedKeys.size());

            // sort the encoded keys (we need to sort, despite the fact that they are already sorted by lemmas. This is because the Encoder adds prefixes sometimes);
            TVector<ui32> keyAlphaSortTable;
            keyAlphaSortTable.resize(DecodedKeys.size(), Max<ui32>());
            TVector<ui32>::iterator p = keyAlphaSortTable.begin();
            TVector<TString>::iterator e = encodedKeys.begin();
            for (ui32 tmpTermId = 0; tmpTermId < DecodedKeys.size(); ++tmpTermId) {
                if (FinalTermIdRemap[tmpTermId] == Max<ui32>()) {
                    ++e;
                    continue;
                }
                *p++ = tmpTermId;

                const auto& decodedKey = DecodedKeys[tmpTermId];
                Y_ASSERT(decodedKey.Lemma());

                bool encoded = encoder.Encode(decodedKey, &(*e++));
                Y_VERIFY(encoded);
            }

            keyAlphaSortTable.resize(p - keyAlphaSortTable.begin());

            Sort(keyAlphaSortTable.begin(), keyAlphaSortTable.end(), [&encodedKeys](ui32 a, ui32 b) {
                        return encodedKeys[a] < encodedKeys[b];
                    });

            DBG_MEM_LOG("WriteKeys remap finalTermId...OK");
            Y_ASSERT(keyAlphaSortTable.size() == finalTermId);
            Y_ASSERT(DebugCheckOrder(keyAlphaSortTable, encodedKeys, &DecodedKeys));
            {
                TVector<TDecodedKey> tmp;
                tmp.swap(DecodedKeys);
            }

            DBG_MEM_LOG("WriteKeys WriteKey...");
            // encodedKeys -> Wad
            for (ui32 tmpTermId : keyAlphaSortTable) {
                const TString& encodedKey = encodedKeys[tmpTermId];;
                writer.WriteKey(encodedKey, FinalTermIdRemap[tmpTermId]);
            }
            writer.Finish();
        }

        void WriteHits(const TVector<TInput>& inputs, IWadWriter* output) {
            TIo::TModel hitModel = TStandardIoModelsStorage::Model<TIo::TModel>(TIo::DefaultModel);
            TIo::TWriter writer(hitModel, output);

            DBG_MEM_LOG("WriteHits...");
            for (ui32 chunkNo = 0; chunkNo < inputs.size(); ++chunkNo) {
                auto rd = MakeHolder<TIo::TReader>(inputs[chunkNo].Wad.Get());
                const NRtDoc::TDocIdMap& docIdMap = *inputs[chunkNo].DocIdMap;
                const TTermIdRemap& termRemap = *TermIdRemaps[chunkNo];
                ui32 docId;
                while (rd->ReadDoc(&docId)) {
                    ui32 newDocId = docIdMap.Map(docId);
                    if (newDocId == NRtDoc::TDocIdMap::DeletedDocument()) {
                        continue;
                    }

                    TVector<TIo::TReader::THit> sortedHits; // we need sorting, because the form remapping sometimes breaks the order
                    TIo::TReader::THit hit;
                    while (rd->ReadHit(&hit)) {
                        const ui32 oldTermId = hit.DocId();
                        Y_ASSERT(termRemap.contains(oldTermId));
                        const ui32 tmpTermId = termRemap.at(oldTermId);
                        Y_ASSERT(tmpTermId < FinalTermIdRemap.size());
                        const ui32 finalTermId = FinalTermIdRemap[tmpTermId];
                        Y_ASSERT(finalTermId != Max<ui32>() /* key should not be removed at merge */);
                        hit.SetDocId(finalTermId);

                        const auto data = Data.GetKey(tmpTermId);

                        Y_ASSERT(data.UsageCount() != 0 /* Key is not marked as unused */);
                        if (*data.FinalFormsCount() > 0) {
                            const ui32 tmpFormId = TFormsRemapData::GetForm(data.Remap, chunkNo, hit.Form());
                            Y_ASSERT(tmpFormId < *data.FinalFormsCount());
                            const ui32 finalFormId = data.Hits[tmpFormId];
                            Y_ASSERT(finalFormId != Max<ui32>() /* Form is not removed at merge */);
                            hit.SetForm(finalFormId);
                        } else {
                            // tmpFormId may be not zero here if TKeyDecoder() + TKeyEncoder() converted 1-form key to 0 form.
                            hit.SetForm(0);
                        }

                        sortedHits.push_back(hit);
                    }

                    Sort(sortedHits.begin(), sortedHits.end()); // usually no changes

                    for (auto&& hit : sortedHits) {
                        writer.WriteHit(hit);
                    }

                    writer.WriteDoc(newDocId);
                }
            }
            writer.Finish();
        }

        virtual void Merge(const TVector<TInput>& inputs, IWadWriter* output) {
            BuildDecodings(inputs);

            WriteKeys(output);
            DBG_MEM_LOG("WriteKeys...OK");

            WriteHits(inputs, output);
            DBG_MEM_LOG("WriteHits...OK");
        }
    };
}
