#include "panther_dssm_docid.h"

#include <robot/jupiter/library/rtdoc/interface/mercury.h>
#include <robot/jupiter/library/rtdoc/file/docidmap_io.h>


#include <robot/jupiter/library/tables/shards_prepare.h>
#include <robot/jupiter/protos/panther_dssm_doc.pb.h>

#include <mapreduce/yt/interface/client.h>

namespace NFusion {

    using NProtoBuf::Message;
    using NRtDoc::TDocIdMap;
    using NJupiter::TPantherDssmDocsBatch;
    using NJupiter::TPantherDssmDoc;

    class TPantherDssmDocIdsRemapper: public NYT::IMapper<NYT::TTableReader<TPantherDssmDocsBatch>, NYT::TTableWriter<Message>> {
    private:
        const TFsPath DeltaToFinalMapFile;
        THolder<NRtDoc::TDocIdMap> FinalToDeltaRemap; // back remap
    public:
        TPantherDssmDocIdsRemapper(const TFsPath& remappingFile)
            : DeltaToFinalMapFile(remappingFile)
        {
            Y_ENSURE(remappingFile.IsDefined());
        }

        void Start(NYT::TTableWriter<Message>* /*output*/) override {
            DeltaToFinalMapFile.CheckExists();
            NRtDoc::TDocIdMap deltaToFinal;
            NRtDoc::TDocIdMapIo::Load(&deltaToFinal, DeltaToFinalMapFile);

            FinalToDeltaRemap = MakeHolder<NRtDoc::TDocIdMap>();
            FinalToDeltaRemap->SetInvertedMap(deltaToFinal);
        }

        void Do(NYT::TTableReader<TPantherDssmDocsBatch>* input, NYT::TTableWriter<Message>* output) override {
            Y_ASSERT(!!FinalToDeltaRemap);

            for (; input->IsValid(); input->Next()) {
                const TPantherDssmDocsBatch& source = input->GetRow();
                TPantherDssmDocsBatch result;

                Y_ASSERT(source.HasDocs() && source.HasShard() && source.HasBatch() && source.HasDocs());
                if (Y_UNLIKELY(!source.HasDocs())) {
                    continue;
                }
                if (source.HasShard()) {
                    result.SetShard(source.GetShard());
                }
                if (source.HasBatch()) {
                    result.SetBatch(source.GetBatch());
                }

                NJupiter::TPantherDssmDocsList& resultDocs = *result.MutableDocs();
                const TDocIdMap& remap = *FinalToDeltaRemap;
                for (const TPantherDssmDoc& doc: source.GetDocs().get_arr_docs()) {
                    if (Y_UNLIKELY(!doc.HasDocId())) {
                        continue;
                    }

                    const ui32 newDocId = remap.Map(doc.GetDocId());
                    if (newDocId == TDocIdMap::DeletedDocument()) {
                        continue;
                    }

                    NJupiter::TPantherDssmDoc& newDoc = *resultDocs.AddDocs();
                    newDoc.CopyFrom(doc);
                    newDoc.SetDocId(newDocId);
                }

                if (Y_LIKELY(resultDocs.DocsSize() > 0)) {
                    output->AddRow(result, 0);
                }
            }
        }
    };

    NJupiter::TMercuryCmdPtr CreateFilterPantherDssmByDoc(const TFsPath& remappingFile) {
        const ui32 bucketNo = 0;
        const auto tables = NJupiter::TShardsPrepareTables::Clientless();
        return NJupiter::TMercuryCmdBuilder()
            .Map<TPantherDssmDocIdsRemapper>(remappingFile)
            .Input(tables.GetPantherDssmTermsTable(bucketNo))
            .Output(tables.GetPantherDssmTermsTable(bucketNo))
            .Release();
    }

}
