#pragma once
#include <crypta/graph/mrcc_opt/proto/messages.pb.h>
#include <crypta/graph/mrcc_opt/lib/data.h>
#include <crypta/graph/mrcc_opt/lib/yt.h>
#include <crypta/graph/mrcc_opt/lib/time.h>
#include <crypta/graph/mrcc_opt/lib/operations.h>
#include <mapreduce/yt/interface/client.h>
#include <mapreduce/yt/interface/operation.h>
#include <mapreduce/yt/util/ypath_join.h>
#include <util/folder/path.h>
#include <util/generic/hash.h>
#include <util/generic/hash_set.h>
#include <util/generic/vector.h>
#include <util/string/join.h>
#include <util/string/cast.h>
#include <mapreduce/yt/common/config.h>
#include <iostream>


namespace NConnectedComponents {
const uint64_t KB = 1ull << 10;
const uint64_t MB = KB << 10;
const uint64_t GB = MB << 10;


ui64 GetRowCount(NYT::IClientBasePtr client, const TString& path) {
    return (client->Exists(path)) ? client->Get(path + "/@row_count").AsInt64() : 0;
}

void CreateNewDir(NYT::IClientBasePtr client, const TString& dirPath) {
    if (client->Exists(dirPath)) {
        client->Remove(dirPath, NYT::TRemoveOptions().Recursive(true));
    }
    client->Create(dirPath, NYT::ENodeType::NT_MAP, NYT::TCreateOptions().IgnoreExisting(true).Recursive(true));
}



class IStarOperations {
public:

    IStarOperations(
            NYT::IClientBasePtr client,
            IOutputStream& logger = Cout
        ) : Yt(client), Logger(logger) {}

    IStarOperations(
            TYT& yt,
            IOutputStream& logger = Cout
        ) : Yt(yt), Logger(logger) {}


    virtual int RunLargeStar() = 0;
    virtual int RunSmallStar() = 0;
    virtual void PrepareToStart(bool finishPrepare) = 0;
    virtual void PrepareToFinish() = 0;

    int RunIteration() {
        TMeasure measure(Logger, __func__);

        int LSChangesCount = RunLargeStar();
        int SSChangesCount = RunSmallStar();

        return LSChangesCount + SSChangesCount;
    }

    bool RunIterations(ui64 maxIterationsCount) {
        ui64 changesCount = 1;
        for (; changesCount > 0 && Iteration < maxIterationsCount; ++Iteration) {
            Logger << "\n----\n" << Iteration << " iteration" << Endl;
            changesCount = RunIteration();
        }
        bool result = (changesCount == 0);
        if (result) {
            Logger << "Connected components are found" << Endl;
        } else {
            Logger << "Iterations limit reached" << Endl;
        }
        return result;
    }

    bool Run(ui64 maxIterationsCount = 15, bool startPrepare = true, bool finishPrepare = true) {
        TMeasure measure(Logger, __func__);

        if (startPrepare) {
            PrepareToStart(finishPrepare);
        }

        auto result = RunIterations(maxIterationsCount);

        if (finishPrepare) {
            PrepareToFinish();
        }

        return result;
    }

protected:
    size_t Iteration = 0;
    TYT Yt;
    IOutputStream& Logger;
};


template <class TDataView, class TIdType>
class TNaiveStars: public IStarOperations {
public:
    using TGraphEdge = typename NConnectedComponents::TMROperations<TIdType>::TGraphEdge;
    using TEdges = typename NConnectedComponents::TMROperations<TIdType>::TEdges;

    TNaiveStars(NYT::IClientBasePtr client, const TDataPaths<TDataView> dataPaths, const TString& workdir, IOutputStream& logger = Cout) : IStarOperations(client, logger), DataPaths(dataPaths) {
        LargeStarResultTable  = NYT::JoinYPaths(workdir, "LargeStarResult");
        SmallStarResultTable  = NYT::JoinYPaths(workdir, "SmallStarResult");
        TDataTransformer<TDataView, TIdType> data(Yt, dataPaths.DataView, Logger);
        DataTrans = data;
    }

    void PrepareToStart(bool /*finishPrepare*/) {
        DataTrans.ExtractEdgesAndVertices(DataPaths, SmallStarResultTable);
    }

    void PrepareToFinish() {
        DataTrans.ConvertMRCCEdgesToComponents(DataPaths, {SmallStarResultTable});
    }

    int RunLargeStar() override {
        TMeasure measure(Logger, __func__);
        NYT::TMapReduceOperationSpec spec;
        spec
            .template AddInput<TGraphEdge>(SmallStarResultTable)
            .template AddOutput<TGraphEdge>(LargeStarResultTable)
            .SortBy({NGraphEdgeFields::SOURCE, NGraphEdgeFields::DESTINATION})
            .ReduceBy({NGraphEdgeFields::SOURCE});

        auto op = Yt.Client->MapReduce(
            spec,
            new typename NConnectedComponents::TMROperations<TIdType>::TNaiveLargeStarMapper,
            new typename NConnectedComponents::TMROperations<TIdType>::TNaiveLargeStarReducer,
            Yt.CommonOperationOptions
        );

        return NChangesCountStats::ExtractFromOp(op, Logger);
    }

    int RunSmallStar() override {
        TMeasure measure(Logger, __func__);
        NYT::TMapReduceOperationSpec mapReduceSpecSmallStar;
        mapReduceSpecSmallStar
            .template AddInput<TGraphEdge>(LargeStarResultTable)
            .template AddOutput<TGraphEdge>(SmallStarResultTable)
            .SortBy({NGraphEdgeFields::SOURCE, NGraphEdgeFields::DESTINATION})
            .ReduceBy({NGraphEdgeFields::SOURCE});

        auto op = Yt.Client->MapReduce(
            mapReduceSpecSmallStar,
            nullptr,
            new typename NConnectedComponents::TMROperations<TIdType>::TNaiveSmallStarReducer
        );

        return NChangesCountStats::ExtractFromOp(op, Logger);
    }
private:
    TString LargeStarResultTable;
    TString SmallStarResultTable;
    TDataTransformer<TDataView, TIdType> DataTrans;
    TDataPaths<TDataView> DataPaths;
};


template <class TDataView, class TIdType>
class TOptimizedStars: public IStarOperations {
public:
    using TGraphEdge = typename NConnectedComponents::TMROperations<TIdType>::TGraphEdge;
    using TEdges = typename NConnectedComponents::TMROperations<TIdType>::TEdges;
    struct TPaths {
        struct TIteration {
            TString Workdir{};
            struct {
                TString Table{};
                TString TableTotal{};
                TString File{};
            } StopEdges;

            struct {
                TString Table{};
                TString AdditionTable{};
                TString BVTable{};
            } LargeStarResult;

            struct {
                TString Table{};
                TString BV{};
                TString BVCandidatesForMins{};
            } SmallStarResult;

            TString BVFile{};

            struct {
                TString Table{};
                TString TableTotal{};
                TString File{};
            } BVMins;

            TIteration() {}

            TIteration(const TString& workdir) : Workdir(workdir) {
                StopEdges = {
                    .Table = NYT::JoinYPaths(Workdir, "StopEdges"),
                    .TableTotal = NYT::JoinYPaths(Workdir, "AllStopEdges"),
                    .File = NYT::JoinYPaths(Workdir, "StopEdgesFile"),
                };

                LargeStarResult = {
                    .Table = NYT::JoinYPaths(Workdir, "LargeStarResult"),
                    .AdditionTable = NYT::JoinYPaths(Workdir, "LargeStarResultAddition"),
                    .BVTable = NYT::JoinYPaths(Workdir, "LargeStarBVResult")
                };

                SmallStarResult = {
                    .Table = NYT::JoinYPaths(Workdir, "SmallStarResultTable"),
                    .BV = NYT::JoinYPaths(Workdir, "SmallStarBV"),
                    .BVCandidatesForMins = NYT::JoinYPaths(Workdir, "SmallStarBVCandidatesForMins")
                };

                BVMins = {
                    .Table = NYT::JoinYPaths(Workdir, "BVMins"),
                    .TableTotal = NYT::JoinYPaths(Workdir, "BVMinsAll"),
                    .File = NYT::JoinYPaths(Workdir, "BVFile"),
                };

                BVFile = NYT::JoinYPaths(Workdir, "BVFile");
            }
        };


        NYT::IClientBasePtr Client;
        TString Workdir;

        struct {
            TString Dir{};
            TString PreviousLabels{};
            TString Edges{};
            TString Vertices{};
            TString MRCCEdges{};
            TString BVAll{};
            TString BVMins{};
            TString BVMinsFile{};
            TString BVCandidatesForMins{};
        } Sources;
        struct {
            TString Dir{};
            TString Edges{};
            TString BV{};
            TString Components{};
        } Results;

        TPaths() {}
        TPaths(NYT::IClientBasePtr client, const TString& workdir): Client(client), Workdir(workdir) {
            TString sourceDir = NYT::JoinYPaths(Workdir, "Sources");
            Sources = {
                .Dir = sourceDir,
                .PreviousLabels = NYT::JoinYPaths(sourceDir, "PreviousLabels"),
                .Edges = NYT::JoinYPaths(sourceDir, "Edges"),
                .Vertices = NYT::JoinYPaths(sourceDir, "Vertices"),
                .MRCCEdges = NYT::JoinYPaths(sourceDir, "MRCCEdges"),
                .BVAll = NYT::JoinYPaths(sourceDir, "BVAll"),
                .BVMins = NYT::JoinYPaths(sourceDir, "BVMins"),
                .BVMinsFile = NYT::JoinYPaths(sourceDir, "BVMinsFile"),
                .BVCandidatesForMins = NYT::JoinYPaths(sourceDir, "BVCandidatesForMins")
            };

            TString resultsDir = NYT::JoinYPaths(Workdir, "Results");
            Results = {
                .Dir = resultsDir,
                .Edges = NYT::JoinYPaths(resultsDir, "Edges"),
                .BV = NYT::JoinYPaths(resultsDir, "BV"),
                .Components = NYT::JoinYPaths(resultsDir, "Components")
            };
        }

        TIteration GetIterationPaths(ui64 iteration) {
            TIteration paths(NYT::JoinYPaths(Workdir, ToString(iteration)));
            return paths;
        }
    };

    TOptimizedStars(NYT::IClientBasePtr client,
            const TDataPaths<TDataView>& dataPaths,
            IOutputStream& logger = Cout,
            bool saveAllIterations = false,
            bool withPremarking = true
            ) : IStarOperations(client, logger),
                BasePaths(client, dataPaths.Workdir),
                SaveAllIterations(saveAllIterations),
                WithPreMarking(withPremarking),
                DataPaths(dataPaths),
                DataTrans(Yt, dataPaths.DataView, logger) {
        Paths = BasePaths.GetIterationPaths(Iteration);
    }

    TOptimizedStars(TYT& yt,
            const TDataPaths<TDataView>& dataPaths,
            IOutputStream& logger = Cout,
            bool saveAllIterations = false,
            bool withPremarking = true
            ) : IStarOperations(yt, logger),
                BasePaths(yt.Client, dataPaths.Workdir),
                SaveAllIterations(saveAllIterations),
                WithPreMarking(withPremarking),
                DataPaths(dataPaths),
                DataTrans(Yt, dataPaths.DataView, logger) {
        Paths = BasePaths.GetIterationPaths(Iteration);
    }

    void PrepareFirstIteration() {
        auto& edges = BasePaths.Sources.MRCCEdges;
        CreateNewDir(Yt.Client, Paths.Workdir);

        if (WithPreMarking) {
            NYT::TMapReduceOperationSpec mapReduceSpec;
            mapReduceSpec
                .template AddInput<TGraphEdge>(edges)
                .template AddOutput<TGraphEdge>(BasePaths.Sources.BVMins)
                .SortBy({NGraphEdgeFields::SOURCE, NGraphEdgeFields::DESTINATION})
                .ReduceBy({NGraphEdgeFields::SOURCE});
            Yt.Client->MapReduce(
                mapReduceSpec,
                nullptr,
                new typename NConnectedComponents::TMROperations<TIdType>::TExtractBigKeysCombiner,
                new typename NConnectedComponents::TMROperations<TIdType>::TExtractFirstForBigKeyReducer,
                Yt.GetCommonOperationOptionsWithJoinedSpec(NYT::TNode()("data_size_per_sort_job", 1024 * 1024 * 128))
            );

            WriteEdgesFromTableToFile({BasePaths.Sources.BVMins}, BasePaths.Sources.BVMinsFile);
            NYT::TMapOperationSpec spec;
            spec
                .template AddInput<TGraphEdge>(edges)
                .template AddOutput<TGraphEdge>(edges)
                .template AddOutput<TGraphEdge>(BasePaths.Sources.BVAll)
                .template AddOutput<TGraphEdge>(BasePaths.Sources.BVCandidatesForMins);
            spec.MapperSpec(
                NYT::TUserJobSpec{}
                    .AddFile(BasePaths.Sources.BVMinsFile)
                .MemoryLimit(2ULL << 30ULL /*2GB*/)
            );
            Yt.Client->Map(
                spec,
                new typename NConnectedComponents::TMROperations<TIdType>::TMarkEdgesWithBigSourceMapper(TFsPath(BasePaths.Sources.BVMinsFile).Basename()),
                Yt.CommonOperationOptions
            );

            Yt.Client->Copy(BasePaths.Sources.BVAll, Paths.SmallStarResult.BV);
            Yt.Client->Copy(BasePaths.Sources.BVCandidatesForMins, Paths.SmallStarResult.BVCandidatesForMins);
        }

        Yt.Client->Copy(edges, Paths.SmallStarResult.Table);
    }

    void PrepareToStart(bool finishPrepare) override {
        auto& sources = BasePaths.Sources;
        CreateNewDir(Yt.Client, sources.Dir);
        if (finishPrepare) {
            DataTrans.ExtractEdgesAndVertices(DataPaths, sources.MRCCEdges);
        } else {
            DataTrans.ConvertDataToMRCCEdges(DataPaths.SourceData, sources.MRCCEdges, DataPaths.PreviousLabels);
        }
        PrepareFirstIteration();
    }

    void PrepareToFinish() override {
        DataTrans.ConvertMRCCEdgesToComponents(DataPaths, {Paths.SmallStarResult.Table});
    }

    bool CreateIfNotExist(const TString& path) {
        if (!Yt.Client->Exists(path)) {
            auto writer = Yt.Client->CreateTableWriter<TGraphEdge>(path);
            writer->Finish();
            return false;
        }
        return true;
    }

    int LargeStarProcessGeneralVertices() {
        TMeasure measure(Logger, __func__);

        NYT::TMapReduceOperationSpec spec;
        spec
            .template AddInput<TGraphEdge>(Paths.SmallStarResult.Table)
            .template AddOutput<TGraphEdge>(Paths.LargeStarResult.Table)
            .template AddOutput<TGraphEdge>(Paths.StopEdges.Table)
            .SortBy({NGraphEdgeFields::SOURCE, NGraphEdgeFields::DESTINATION})
            .ReduceBy({NGraphEdgeFields::SOURCE});
        auto op = Yt.Client->MapReduce(
            spec,
            new typename NConnectedComponents::TMROperations<TIdType>::TLargeStarMapper,
            new typename NConnectedComponents::TMROperations<TIdType>::TLargeStarReducer,
            Yt.GetCommonOperationOptionsWithJoinedSpec(
                NYT::TNode()
                ("max_data_weight_per_job", 400 * GB)
                ("reducer", NYT::TNode()
                    ("max_data_weight_per_job", 400 * GB))
            )
        );

        return NChangesCountStats::ExtractFromOp(op, Logger) + LargeStarPostProcess();;
    }

    int LargeStarPostProcess() {
        TMeasure measure(Logger, __func__);

        if (!GetRowCount(Yt.Client, Paths.StopEdges.Table)) {
            auto writer = Yt.Client->CreateTableWriter<TGraphEdge>(Paths.LargeStarResult.AdditionTable);
            writer->Finish();
            return 0;
        }
        WriteEdgesFromTableToFile({Paths.StopEdges.Table}, Paths.StopEdges.File);
        NYT::TMapOperationSpec spec;
        spec
            .template AddInput<TGraphEdge>(Paths.SmallStarResult.Table)
            .template AddOutput<TGraphEdge>(Paths.LargeStarResult.AdditionTable);
        spec.MapperSpec(
            NYT::TUserJobSpec{}
                .AddFile(Paths.StopEdges.File)
            .MemoryLimit(3ULL << 30ULL /*3GB*/)
        );
        auto op = Yt.Client->Map(
            spec,
            new typename NConnectedComponents::TMROperations<TIdType>::TLSPostProcessBigVerticesMapper(TFsPath(Paths.StopEdges.File).Basename()),
            Yt.CommonOperationOptions
        );

        return NChangesCountStats::ExtractFromOp(op, Logger);
    }

    void ComputeMinsForBigVertices() {
        TMeasure measure(Logger, __func__);
        NYT::TMapReduceOperationSpec mapReduceSpecForMins;
        CreateIfNotExist(Paths.SmallStarResult.BVCandidatesForMins);
        mapReduceSpecForMins
            .template AddInput<TGraphEdge>(Paths.SmallStarResult.BVCandidatesForMins)
            .template AddOutput<TGraphEdge>(Paths.BVMins.Table)
            .SortBy({NGraphEdgeFields::SOURCE, NGraphEdgeFields::DESTINATION})
            .ReduceBy({NGraphEdgeFields::SOURCE});

        auto op = Yt.Client->MapReduce(
            mapReduceSpecForMins,
            nullptr,
            new typename NConnectedComponents::TMROperations<TIdType>::TExtractFirstRecordReducer,
            new typename NConnectedComponents::TMROperations<TIdType>::TExtractFirstRecordReducer,
            Yt.CommonOperationOptions
        );
    }

    int LargeStarProcessBigVertices() {
        TMeasure measure(Logger, __func__);
        if (!GetRowCount(Yt.Client, Paths.SmallStarResult.BV)) {
            auto writer = Yt.Client->CreateTableWriter<TGraphEdge>(Paths.LargeStarResult.BVTable);
            writer->Finish();
            return 0;
        }
        ComputeMinsForBigVertices();
        WriteEdgesFromTableToFile({Paths.BVMins.Table}, Paths.BVMins.File);
        NYT::TMapOperationSpec spec;
        spec
            .template AddInput<TGraphEdge>(Paths.SmallStarResult.BV)
            .template AddOutput<TGraphEdge>(Paths.LargeStarResult.BVTable);
        spec.MapperSpec(
            NYT::TUserJobSpec{}
                .AddFile(Paths.BVMins.File)
            .MemoryLimit(2ULL << 30ULL /*2GB*/)
        );
        auto op = Yt.Client->Map(
            spec,
            new typename NConnectedComponents::TMROperations<TIdType>::TLSProcessBigVerticesMapper(TFsPath(Paths.BVMins.File).Basename()),
            Yt.CommonOperationOptions
        );

        return NChangesCountStats::ExtractFromOp(op, Logger);
    }

    void WriteToFile(const TString& file, const TEdges& protoEdges) {
        auto writer = Yt.Client->CreateFileWriter(file);
        TString stringProto;
        Y_PROTOBUF_SUPPRESS_NODISCARD protoEdges.SerializeToString(&stringProto);
        *writer << stringProto;
        writer->Finish();
    }

    void WriteEdgesFromTableToFile(const TVector<TString>& tables, const TString& file) {
        TEdges protoEdges;
        auto edges = protoEdges.MutableEdges();
        for (const auto& table: tables) {
            if (Yt.Client->Exists(table)) {
                auto reader = Yt.Client->CreateTableReader<TGraphEdge>(table);
                for (; reader->IsValid(); reader->Next()) {
                    auto& row = reader->GetRow();
                    auto edge = edges->Add();
                    *edge = row;
                }
            }
        }
        WriteToFile(file, protoEdges);
    }


    int RunLargeStar() override {
        TMeasure measure(Logger, __func__);
        if (SaveAllIterations) {
            CreateNewWorkdirForNextIteration();
        }

        auto changesCountBig = LargeStarProcessGeneralVertices();
        auto changesCountSmall = LargeStarProcessBigVertices();
        return changesCountBig + changesCountSmall;
    }

    TPaths GetPaths() {
        return Paths;
    }

    void MergeTo(const TString& source, const TString& destination) {
        if (!Yt.Client->Exists(source)) {
            return;
        }
        NYT::TMergeOperationSpec spec;
        spec.AddInput(NYT::TRichYPath(source));

        spec.Output(NYT::TRichYPath(destination).Append(true));
        Yt.Client->Merge(spec, Yt.CommonOperationOptions);
    }

    int RunSmallStar() override {
        TMeasure measure(Logger, __func__);
        MergeTo(Paths.StopEdges.Table, Paths.StopEdges.TableTotal);
        MergeTo(Paths.BVMins.Table, Paths.BVMins.TableTotal);
        WriteEdgesFromTableToFile({Paths.StopEdges.TableTotal, Paths.BVMins.TableTotal}, Paths.BVFile);
        NYT::TMapReduceOperationSpec spec;
        spec
            .template AddInput<TGraphEdge>(Paths.LargeStarResult.Table)
            .template AddInput<TGraphEdge>(Paths.LargeStarResult.AdditionTable)
            .template AddInput<TGraphEdge>(Paths.LargeStarResult.BVTable)
            .template AddOutput<TGraphEdge>(Paths.SmallStarResult.Table)
            .template AddOutput<TGraphEdge>(Paths.SmallStarResult.BV)
            .template AddOutput<TGraphEdge>(Paths.SmallStarResult.BVCandidatesForMins)
            .SortBy({NGraphEdgeFields::SOURCE, NGraphEdgeFields::DESTINATION})
            .ReduceBy({NGraphEdgeFields::SOURCE});
        spec.ReducerSpec(
            NYT::TUserJobSpec{}
                .AddFile(Paths.BVFile)
                .MemoryLimit(2ULL << 30ULL /*2GB*/)
        );

        auto op = Yt.Client->MapReduce(
            spec,
            nullptr,
            new typename NConnectedComponents::TMROperations<TIdType>::TSmallStarReducer(TFsPath(Paths.BVFile).Basename()),
            Yt.CommonOperationOptions
        );
        return NChangesCountStats::ExtractFromOp(op, Logger);
    }

    void CreateNewWorkdirForNextIteration() {
        auto newPaths = BasePaths.GetIterationPaths(Iteration+1);
        if (Yt.Client->Exists(newPaths.Workdir)) {
            Yt.Client->Remove(newPaths.Workdir, NYT::TRemoveOptions().Recursive(true));
        }
        Yt.Client->Copy(Paths.Workdir, newPaths.Workdir, NYT::TCopyOptions().Recursive(true).Force(true));
        Paths = newPaths;
    }

private:
    TPaths BasePaths;
    typename TPaths::TIteration Paths;
    bool SaveAllIterations = false;
    bool WithPreMarking = true;
    TDataPaths<TDataView> DataPaths;
    TDataTransformer<TDataView, TIdType> DataTrans;
};

} // namespace NConnectedComponents
