#include "multipart.h"
#include "multipart_ctx.h"
#include "merger.h"
#include <kernel/multipart_archive/multipart.h>
#include <library/cpp/logger/global/global.h>
#include <library/cpp/testing/unittest/registar.h>

using namespace NRTYArchive;

Y_UNIT_TEST_SUITE(TMultipartArchiveMergerSuite) {
    void InitLog() {
        if (!GlobalLogInitialized())
            DoInitGlobalLog("console", 7, false, false);
    }
    void Clear(const TFsPath& archive) {
        TVector<TFsPath> children;
        archive.Parent().List(children);
        for (auto& f : children)
        if (f.GetName().StartsWith(archive.GetName()))
            f.ForceDelete();

    }

    class TTestDecoderSplit : public IRTYMergerDocIdDecoder {
    public:
        TTestDecoderSplit(ui32 size)
            : HalfSize(size / 2)
        {}

        TRTYMergerAddress Decode(ui32 /*clusterId*/, ui32 docId) const override {
            ui32 rDocid = 2 * HalfSize - docId - 1;
            return TRTYMergerAddress( rDocid / HalfSize, rDocid % HalfSize);
        }
        bool Check(ui32 /*clusterId*/, ui32 /*docId*/) const override {
            return true;
        }
        ui32 GetSizeOfCluster(ui32 clusterId) const override {
            return clusterId ? 0 : 2 * HalfSize;
        }

        ui32 GetSize() const override {
            return 0;
        }
        bool IsValidClusterId(ui32 /*clusterId*/) const override {
            return true;
        }
        ui32 GetNewDocsCount(ui32 /*clusterId*/) const override {
            return HalfSize;
        }
        bool NewToOld(ui32 clusterId, ui32 docId, TRTYMergerAddress& addr) const override {
            if (clusterId >= 2 || docId >= HalfSize)
                return false;
            addr.ClusterId = 0;
            ui32 rDocid = clusterId * HalfSize + docId;
            addr.DocId = 2 * HalfSize - rDocid - 1;
            return true;
        }
        void PatchDestMap(ui32 /*clusterId*/, const TVector<ui32>& /*remap*/) override {
        }
    private:
        ui32 HalfSize;
    };

    class TTestDecoderFusion : public IRTYMergerDocIdDecoder {
    public:
        TTestDecoderFusion(ui32 size)
            : HalfSize(size / 2) {
        }

        TRTYMergerAddress Decode(ui32 clusterId, ui32 docId) const override {
            ui32 rDocid = clusterId * HalfSize + docId;
            ui32 docid = 2 * HalfSize - rDocid - 1;
            return TRTYMergerAddress(0, docid);
        }
        bool Check(ui32 /*clusterId*/, ui32 /*docId*/) const override {
            return true;
        }
        ui32 GetSizeOfCluster(ui32 clusterId) const override {
            return clusterId < 2 ? 2 * HalfSize : 0;
        }

        ui32 GetSize() const override {
            return 0;
        }
        bool IsValidClusterId(ui32 /*clusterId*/) const override {
            return true;
        }
        ui32 GetNewDocsCount(ui32 clusterId) const override {
            if (!clusterId || clusterId == Max<ui32>())
                    return 2 * HalfSize;
            return 0;
        }
        bool NewToOld(ui32 clusterId, ui32 docId, TRTYMergerAddress& addr) const override {
            if (clusterId || docId >= 2 * HalfSize)
                return false;
            ui32 rDocid = 2 * HalfSize - docId - 1;
            addr.ClusterId = rDocid / HalfSize;
            addr.DocId = rDocid % HalfSize;
            return true;
        }
        void PatchDestMap(ui32 /*clusterId*/, const TVector<ui32>& /*remap*/) override {
        }
    private:
        ui32 HalfSize;
    };

    Y_UNIT_TEST(TestMerge) {
        InitLog();
        Clear("src");
        Clear("dst1");
        Clear("dst2");
        Clear("dst");
        TMultipartConfig config;

        TArchiveOwner::TPtr archive = TArchiveOwner::Create("src", config, 12);
        for (ui32 i = 0; i < 3; ++i) {
            for (ui32 j = 0; j < 4; ++j)
                archive->PutDocument(TBlob::FromString(ToString(4 * i + j)), 4 * i + j);
            if (i < 2)
                archive->Flush();
        }
        archive->Flush();
        TVector<TString> srcs(1, "src");
        TVector<TString> dsts;
        dsts.push_back("dst1");
        dsts.push_back("dst2");
        TTestDecoderSplit decoderS(12);
        TTestDecoderFusion decoderF(12);
        TRTYMerger merger(nullptr, TRTYMerger::otMpArch);

        TRTYMerger::TContext ctxS(srcs, dsts, "", &decoderS, nullptr);
        ctxS.MultipartArcFile = "";
        ctxS.MultipartMergerContext = new TMultipartMergerContext(config);
        merger.MergeIndicies(ctxS);
        for (ui32 d = 0; d < dsts.size(); ++d) {
            TArchiveOwner::TPtr archive(TArchiveOwner::Create(dsts[d], config));
            for (ui32 docid = 0; docid < 6; ++docid) {
                TBlob doc = archive->GetDocument(docid);
                TStringBuf data(doc.AsCharPtr(), doc.Size());
                TRTYMergerAddress oldAddr;
                UNIT_ASSERT(decoderS.NewToOld(d, docid, oldAddr));
                UNIT_ASSERT_EQUAL(data, ToString(oldAddr.DocId));
            }
        }
        TRTYMerger::TContext ctxF(dsts, TVector<TString>(1, "dst"), "", &decoderF, nullptr);
        ctxF.MultipartArcFile = "";
        ctxF.MultipartMergerContext = new TMultipartMergerContext(config);
        merger.MergeIndicies(ctxF);
        TArchiveOwner::TPtr archiveR(TArchiveOwner::Create("dst", config));
        for (ui32 docid = 0; docid < 12; ++docid) {
            TBlob doc = archiveR->GetDocument(docid);
            TStringBuf data(doc.AsCharPtr(), doc.Size());
            TRTYMergerAddress oldAddr;
            UNIT_ASSERT(decoderF.NewToOld(0, docid, oldAddr));
            UNIT_ASSERT(decoderS.NewToOld(oldAddr.ClusterId, oldAddr.DocId, oldAddr));
            UNIT_ASSERT_EQUAL(oldAddr.DocId, docid);
            UNIT_ASSERT_EQUAL(data, ToString(docid));
        }
    }

    Y_UNIT_TEST(TestDatalessMerge) {
        InitLog();
        Clear("src1");
        Clear("src2");
        Clear("dst");
        TMultipartConfig config;
        config.ReadContextDataAccessType = IDataAccessor::DIRECT_FILE;
        config.Compression = IArchivePart::RAW;

        TVector<TString> srcs({"src1", "src2"});
        {
            TTestDecoderSplit decoderS(12);
            TArchiveOwner::TPtr archives[2] = {
                TArchiveOwner::Create(srcs[0], config, 12),
                TArchiveOwner::Create(srcs[1], config, 12)
            };
            for (ui32 docid = 0; docid < 12; docid++) {
                TRTYMergerAddress addr = decoderS.Decode(0, docid);
                archives[addr.ClusterId]->PutDocument(TBlob::FromString(ToString(docid)), addr.DocId);
            }
        }

        // merging using IArchivePart::DATALESS part_impl
        TVector<TString> dsts({"dst"});
        TTestDecoderFusion decoderF(12);
        TRTYMerger merger(nullptr, TRTYMerger::otMpArch);
        TMultipartConfig configDataless;
        configDataless.ReadContextDataAccessType = IDataAccessor::DIRECT_FILE;
        configDataless.Compression = IArchivePart::DATALESS;
        TRTYMerger::TContext ctxF(srcs, dsts, "", &decoderF, nullptr);
        ctxF.MultipartArcFile = "";
        ctxF.MultipartMergerContext = new TMultipartMergerContext(configDataless, true);
        merger.MergeIndicies(ctxF);

        // now reading using regular IArchivePart::RAW part_impl to validate document contents
        TArchiveOwner::TPtr archiveR(TArchiveOwner::Create("dst", config));
        for (ui32 docid = 0; docid < 12; ++docid) {
            TBlob doc = archiveR->GetDocument(docid);
            TStringBuf data(doc.AsCharPtr(), doc.Size());
            UNIT_ASSERT_EQUAL(data, ToString(docid));
        }
    }

   Y_UNIT_TEST(TestCompressed) {
        InitLog();
        Clear("src");
        TMultipartConfig config;
        config.Compression = IArchivePart::COMPRESSED;

        TVector<TString> docs;
        ui32 size = 0;
        while (size < 1 << 16) {
            docs.push_back(TString(10000, 'A' + docs.size() % 26));
            size += docs.back().size();
        }
        TArchiveOwner::TPtr archive = TArchiveOwner::Create("src", config, docs.size());
        for (ui32 i = 0; i < docs.size(); ++i) {
            archive->PutDocument(TBlob::NoCopy(docs[i].data(), docs[i].size()), i);
        }
        for (ui32 i = 0; i < docs.size(); ++i) {
            TBlob doc = archive->GetDocument(i);
            UNIT_ASSERT_EQUAL(doc.Size(), docs[i].size());
            UNIT_ASSERT_EQUAL(memcmp(doc.AsCharPtr(), docs[i].data(), doc.Size()), 0);
        }
        TSet<ui32> docids;
        for (auto i = archive->CreateIterator(); i->IsValid(); i->Next()) {
            TBlob doc = i->GetDocument();
            ui32 docid = i->GetDocid();
            UNIT_ASSERT_EQUAL(doc.Size(), docs[docid].size());
            UNIT_ASSERT_EQUAL(memcmp(doc.AsCharPtr(), docs[docid].data(), doc.Size()), 0);
            docids.insert(i->GetDocid());
        }
        UNIT_ASSERT_EQUAL(docids.size(), docs.size());
        UNIT_ASSERT_EQUAL(*docids.rbegin(), docs.size() - 1);
    }
}
