#include "yt_indexer.h"

#include "yt_document_mapper.h"
#include "yt_indexing_mapper.h"
#include "yt_indexing_reducer.h"
#include "yt_merging_reducer.h"
#include "yt_publisher.h"
#include "yt_renaming_mapper.h"
#include "config_files.h"

#include <saas/rtyserver/config/common_indexers_config.h>
#include <saas/rtyserver/config/config.h>
#include <saas/rtyserver/config/indexer_config.h>
#include <saas/rtyserver/config/merger_config.h>
#include <saas/rtyserver/config/realm_config.h>
#include <saas/rtyserver/factors/factors_config.h>
#include <saas/rtyserver/pruning_config/pruning_config.h>
#include <saas/api/mr_client/processors/processors.h>

#include <mapreduce/yt/interface/client.h>

#include <library/cpp/yson/node/node_io.h>

#include <util/generic/size_literals.h>
#include <util/generic/yexception.h>
#include <util/stream/file.h>

namespace {

    class TSortDocumentsCommand : public TYTCommand {
    public:
        TSortDocumentsCommand(TInputs inputs, TOutputs outputs, bool verbose, NSaas::TYTLaunchReport& report, const NYT::TNode& acl)
            : TYTCommand(std::move(inputs), std::move(outputs), "TSortDocumentsCommand", verbose, report, acl)
        {
        }
        virtual ~TSortDocumentsCommand() = default;

    protected:
        void DoPrepareSpec() override {
            auto keys = ShardIdKeyColumns;
            keys.Add("segment_id");
            Spec.AddInput(Inputs.at(0)).Output(Outputs.at(0)).SortBy(keys);
        }

        void DoRun(NYT::IClientBase* client) override {
            NYT::TYPath deltaAttribute = Inputs.at(0).Path_ + "/@is_delta";
            bool isDelta = (client->Exists(deltaAttribute) && client->Get(deltaAttribute).AsBool());
            client->Create(Outputs.at(0).Path_, NYT::ENodeType::NT_TABLE,
                            NYT::TCreateOptions().Recursive(true).Force(true)
                            .Attributes(NYT::TNode()("is_delta", isDelta)));
            Sort(client, Spec);
        }

    private:
        NYT::TSortOperationSpec Spec;
    };
    class TShardResourceSortCommand : public TYTCommand {
    public:
        TShardResourceSortCommand(TInputs inputs, TOutputs outputs, bool verbose, NSaas::TYTLaunchReport& report, const NYT::TNode& acl)
            : TYTCommand(std::move(inputs), std::move(outputs), "TShardResourceSortCommand", verbose, report, acl)
        {
        }
        virtual ~TShardResourceSortCommand() = default;

    protected:
        void DoPrepareSpec() override {
            Keys = NYT::TSortColumns();
            Keys.Add("shard_min", "shard_max", "timestamp", "timestamp_ext");
            Outputs.at(0).SortedBy(Keys);
            for (const auto& input : Inputs) {
                Spec.AddInput(input);
            }
            Spec.Output(Outputs.at(0))
                .SortBy(Keys);
        }

        void DoRun(NYT::IClientBase* client) override {
            client->Create(Outputs.at(0).Path_, NYT::NT_TABLE, NYT::TCreateOptions().Recursive(true).Force(true));
            Sort(client, Spec);
            PrintVerbose(TString("Index table: ") + Outputs.at(0).Path_.Quote());
        }

    private:
        NYT::TSortColumns Keys;
        NYT::TSortOperationSpec Spec;
    };

    class TFinalSortCommand : public TYTCommand {
    public:
        TFinalSortCommand(TInputs inputs, TOutputs outputs, bool verbose, NSaas::TYTLaunchReport& report, EYTBlobFormat blobFormat, const NYT::TNode& acl)
            : TYTCommand(std::move(inputs), std::move(outputs), "TFinalSortCommand", verbose, report, acl)
            , BlobFormat(blobFormat)
        {
        }
        virtual ~TFinalSortCommand() = default;

        static NYT::TSortColumns GetSortColumns(EYTBlobFormat blobFormat) {
            if (blobFormat == EYTBlobFormat::Old) {
                return NYT::TSortColumns(ShardIdKeyColumns).Add("segment_id", "name", "part_index");
            } else {
                return NYT::TSortColumns(ShardIdKeyColumns).Add("segment_id", "filename", "part_index");
            }
        }

    protected:
        void DoPrepareSpec() override {
            Keys = GetSortColumns(BlobFormat);
            Outputs.at(0).SortedBy(Keys);
            for (const auto& input : Inputs) {
                Spec.AddInput(input);
            }
            Spec.Output(Outputs.at(0))
                .SortBy(Keys);
            // TODO: use GenerateTableSchema() from robot/library/yt/static/table.h
            /*
            NYT::TTableSchema finalSchema;
            finalSchema.AddColumn(NYT::TColumnSchema().Name(ShardIdKeyColumns).Type(NYT::VT_UINT64))
                       .AddColumn(NYT::TColumnSchema().Name("name").Type(NYT::VT_STRING))
                       .AddColumn(NYT::TColumnSchema().Name("part_index").Type(NYT::VT_UINT64))
                       .AddColumn(NYT::TColumnSchema().Name("data_length").Type(NYT::VT_UINT64))
                       .AddColumn(NYT::TColumnSchema().Name("data").Type(NYT::VT_STRING));
            */
        }

        void DoRun(NYT::IClientBase* client) override {
            NYT::TYPath deltaAttribute = Inputs.at(0).Path_ + "/@is_delta";
            bool isDelta = (client->Exists(deltaAttribute) && client->Get(deltaAttribute).AsBool());
            client->Create(Outputs.at(0).Path_, NYT::ENodeType::NT_TABLE,
                            NYT::TCreateOptions().Recursive(true).Force(true)
                            .Attributes(NYT::TNode()("is_delta", isDelta)));
            Sort(client, Spec);
            PrintVerbose(TString("Index table: ") + Outputs.at(0).Path_.Quote());
        }

    private:
        NYT::TSortColumns Keys;
        NYT::TSortOperationSpec Spec;
        EYTBlobFormat BlobFormat;
    };

    class TMoveCommand: public TYTCommand {
    public:
        TMoveCommand(TInputs inputs, TOutputs outputs, bool verbose, NSaas::TYTLaunchReport& report)
            : TYTCommand(std::move(inputs), std::move(outputs), "TMoveCommand", verbose, report, {})
        {
        }

    protected:
        void DoPrepareSpec() override {
        }

        void DoRun(NYT::IClientBase* client) override {
            const auto& src = Inputs.at(0).Path_;
            const auto& dst = Outputs.at(0).Path_;
            client->Move(src, dst, NYT::TMoveOptions().Recursive(true).Force(true));
            PrintVerbose(TStringBuilder() << "Move table: " << src << " to " << dst);
        }
    };
}

TYTIndexer::TOptions::TOptions(int argc, const char* argv[])
    : CommandLine(JoinStrings(argv, argv + argc, " "))
{
    NLastGetopt::TOpts opts;
    opts.AddHelpOption('?');
    opts.AddVersionOption('v');
    BindToOpts(opts);

    NLastGetopt::TOptsParseResultException res(&opts, argc, argv);

    PostProcess(res.GetFreeArgs());
}

void TYTIndexer::TOptions::BindToOpts(NLastGetopt::TOpts& opts) {
    DaemonOptions.BindToOpts(opts);

    opts.AddLongOption("proxy", "YT cluster to run the indexer")
        .RequiredArgument("PROXY").StoreResult(&Proxy)
        .Required();
    opts.AddLongOption("src", "path to YT table with documents for indexing (schema depends on PROCESSOR)\n"
                              "can be supplited multiple times for more than one input table")
        .RequiredArgument("SRC_TABLE").SplitHandler(&Src, '\0')
        .Required();
    opts.AddLongOption("dst-dir", "path to YT dir for indexing results")
        .RequiredArgument("DST_DIR").StoreResult(&DstDir)
        .Required();
    opts.AddLongOption("service", "name of the SaaS service")
        .RequiredArgument("SERVICE").StoreResult(&ServiceName)
        .Required();
    opts.AddLongOption("searchmap", "path to the searchmap.json file with sharding rules for SERVICE")
        .RequiredArgument("SEARCHMAP").StoreResult(&SearchMapPath)
        .Required();

    opts.AddLongOption("dst", "path to YT table to store index (default: \"DST_DIR/data/SERVICE/TIMESTAMP.blob\")")
        .RequiredArgument("DST_TABLE").StoreResult(&Dst);
    opts.AddLongOption("tmp-dir", "path to YT dir for temp tables (default: \"DST_DIR/tmp/SERVICE_TIMESTAMP/\")")
        .RequiredArgument("TMP_DIR").StoreResult(&TmpDir);

    opts.AddLongOption("publish", "publish index in the snapshot manager")
        .NoArgument().StoreResult(&Publish, true)
        .DefaultValue(Publish ? "true" : "false");
    opts.AddLongOption("publish-manager", "snapshot manager type: yt, zookeeper")
        .RequiredArgument("PUB_MGR").StoreResult(&SnapshotManager)
        .DefaultValue(SnapshotManager);
    opts.AddLongOption("publish-server", "snapshot server: YT proxy or ZK hosts (default: \"PROXY\" when PUB_MGR=\"yt\")")
        .RequiredArgument("PUB_SRV").StoreResult(&SnapshotManagerContext.Server);
    opts.AddLongOption("publish-path", "ypath or znode to publish results (default: \"DST_DIR/testing/SERVICE\" when PUB_MGR=\"yt\")")
        .RequiredArgument("PUB_PATH").StoreResult(&SnapshotManagerContext.Path);
    opts.AddLongOption("sky-share", "share index via sky and publish rbtorrent strings")
        .NoArgument().StoreResult(&EnableSkyShare, true)
        .DefaultValue(EnableSkyShare ? "true" : "false");

    opts.AddLongOption("chunk-size", "size of chunk (in bytes), which will be used in index dumpers")
        .RequiredArgument("CHUNK_SIZE").StoreResult(&DumperChunkSize, true)
        .DefaultValue(DumperChunkSize);
    opts.AddLongOption("blob-format", "select blob format: new format required for sky share")
        .RequiredArgument("FORMAT").StoreResult(&BlobFormat)
        .DefaultValue(EYTBlobFormat::Old);
    opts.AddLongOption("add-extra-timestamp", "add extra timestamps for docfetcher ytpull stream on the backend")
        .NoArgument().StoreResult(&AddExtraTimestamp, true)
        .DefaultValue(AddExtraTimestamp ? "true" : "false");

    opts.AddLongOption("cook-format", "cook and publish whole index or in delta format")
        .RequiredArgument("full or delta").StoreResult(&CookFormat)
        .DefaultValue(ECookFormat::Full);
    opts.AddLongOption("timestamp", "modification timestamp for documents (default: now)")
        .RequiredArgument("TIMESTAMP").StoreResult(&ProcessorOptions.Timestamp);
    opts.AddLongOption("processor", "processor to convert rows of SRC_TABLE into SaaS TActions")
        .RequiredArgument("PROCESSOR").StoreResult(&Processor)
        .DefaultValue(Processor);

    opts.AddLongOption("indexer-ram-gb", "Size of RAM for each indexer job in gigabytes")
        .RequiredArgument("RAM_GB").StoreResult(&IndexerRamGb)
        .DefaultValue(IndexerRamGb);
    opts.AddLongOption("indexer-ram-reserve-factor", "Memory reserve factor for each indexer job")
        .RequiredArgument("FLOAT").StoreResult(&IndexerRamReserveFactor)
        .DefaultValue(IndexerRamReserveFactor);
    opts.AddLongOption("indexer-tmpfs-gb", "Size of TmpFS for each indexer job in gigabytes")
        .RequiredArgument("TMPFS_GB").StoreResult(&IndexerTmpfsGb)
        .DefaultValue(IndexerTmpfsGb);
    opts.AddLongOption("indexer-cpu", "How much cpu to use for each of the indexer jobs")
        .RequiredArgument("CPU").StoreResult(&IndexerCpu)
        .DefaultValue(IndexerCpu);

    opts.AddLongOption("mapper-ram-gb", "Size of RAM for each mapper (processor) job in gigabytes (default: get from yt)")
        .RequiredArgument("RAM_MAPPER_GB").StoreResult(&IndexerRamMapperGb);
    opts.AddLongOption("mapper-max-row-weight-mb", "Max row weight for document mapper output in megabytes (default: get from yt)")
        .RequiredArgument("MAX_ROW_MB").StoreResult(&MaxRowWeightMb);

    opts.AddLongOption("segment-size", "max documents per segment (default: CONFIG.MergerConfig.MaxDocumentsToMerge)")
        .RequiredArgument("SEGMENT").StoreResult(&MaxDocumentsPerSegment);

    opts.AddLongOption("min-segments-count", "min segment count (default: CONFIG.MergerConfig.MaxSegments)")
        .RequiredArgument("MIN_SEGMENT_CNT").StoreResult(&MinSegmentsCount);

    opts.AddLongOption("cut-includes", "cut off external files from config")
        .RequiredArgument("CUT_INCLUDES").StoreResult(&CutConfigIncludes)
        .DefaultValue(CutConfigIncludes ? "true" : "false");
    opts.AddLongOption("dict-path", "path to dict.dict")
        .RequiredArgument("DICT_PATH").StoreResult(&DictPath)
        .DefaultValue(DictPath);

    opts.AddLongOption("split-segments", "split and index segments in smaller portions, then merge results back")
        .NoArgument().StoreResult(&SplitSegments, true)
        .DefaultValue(SplitSegments ? "true" : "false");
    opts.AddLongOption("keep-temps", "don't remove TMP_DIR on successful completion")
        .NoArgument().StoreResult(&KeepTemps, true)
        .DefaultValue(KeepTemps ? "true" : "false");
    opts.AddLongOption("sample-shards", "Prepare table with the sample URLs and corresponding shard ids")
        .NoArgument().StoreResult(&SampleShards, true)
        .DefaultValue(SampleShards ? "true" : "false");
    opts.AddLongOption("sample-urls-num", "Number of sample urls per shard")
        .RequiredArgument("URLS_NUM").StoreResult(&SampleUrlsNum)
        .DefaultValue(SampleUrlsNum);
    opts.AddLongOption("force", "overwrite DST_TABLE if it already exists")
        .NoArgument().StoreResult(&Force, true)
        .DefaultValue(Force ? "true" : "false");
    opts.AddLongOption("resume", "pick up intermediate results from TMP_DIR, use with --timestamp option")
        .NoArgument().StoreResult(&Resume, true)
        .DefaultValue(Resume ? "true" : "false");
    opts.AddLongOption("verbose", "print progress to stderr")
        .NoArgument().StoreResult(&Verbose, true)
        .DefaultValue(Verbose ? "true" : "false");
    opts.AddLongOption("dry-run", "only validate YT specs, don't run any operations")
        .NoArgument().StoreResult(&DryRun, true)
        .DefaultValue(DryRun ? "true" : "false");
    opts.AddLongOption("skip-indexing", "skip indexing and timestamp checks")
        .NoArgument().StoreResult(&SkipIndexing, true)
        .DefaultValue(SkipIndexing ? "true" : "false");
    opts.AddLongOption("forbid-same-urls", "guarantee that source table has unique urls")
        .NoArgument().StoreResult(&ForbidSameUrls, true)
        .DefaultValue(ForbidSameUrls ? "true" : "false");

    opts.AddLongOption("save-deleted-documents", "Index messages with type atDelete as deleted documents (if not set, these documents will be skipped)")
        .NoArgument().StoreResult(&SaveDeletedDocuments, true)
        .DefaultValue(SaveDeletedDocuments ? "true" : "false");
    opts.AddLongOption("fail-on-index-error", "do not ignore indexing errors")
        .NoArgument().StoreResult(&FailOnIndexError, true)
        .DefaultValue(FailOnIndexError ? "true" : "false");
    opts.AddLongOption("filter-incorrect-documents", "save incorrect documents in separate table")
        .NoArgument().StoreResult(&FilterIncorrectDocs, true)
        .DefaultValue(FilterIncorrectDocs ? "true" : "false");

    opts.AddLongOption("attach-path", "attach path to YT job, for example: --attach-path /path/to/my/file --attach-path /path/to/my/file:./dst/my_file")
        .AppendTo(&AttachPath);

    opts.AddLongOption("use-dataless-parts-on-merge", "use DATALESS archive parts in the merging reducer (enabled by --split-segments option). The mode is BROKEN: https://st.yandex-team.ru/SAAS-5603")
        .NoArgument().StoreResult(&UseDatalessPartsOnMerge, true)
        .DefaultValue(UseDatalessPartsOnMerge ? "true" : "false");

    opts.AddLongOption("run-indexer-modules", "comma-separated list of daemon modules to run inside indexer jobs")
        .RequiredArgument("MODULES").StoreResult(&IndexerModulesToRun)
        .DefaultValue("");
    opts.AddLongOption("run-merger-modules", "comma-separated list of daemon modules to run inside merger jobs")
        .RequiredArgument("MODULES").StoreResult(&MergerModulesToRun)
        .DefaultValue("");

    opts.AddLongOption("acl", "Operations ACL in YSON format")
        .RequiredArgument("ACL").Handler1T<TString>([this](TString arg) {
            Acl = NYT::NodeFromYsonString(arg);
        });

    opts.AddLongOption("report-path", "Path to file where extra launch information will be written")
        .RequiredArgument("PATH").StoreResult(&ReportFilePath)
        .DefaultValue("");
}

void TYTIndexer::TOptions::PostProcess(TVector<TString> freeArgs) {
    if (ProcessorOptions.Timestamp == Max<ui64>()) {
        if (Resume) {
            ythrow yexception() << "Option '--resume' cannot be used without '--timestamp'";
        }
        if (AddExtraTimestamp) {
            ythrow yexception() << "Option '--add-extra-timestamp' cannot be used without '--timestamp'";
        }
        ProcessorOptions.Timestamp = TInstant::Now().Seconds();
    }

    if (FilterIncorrectDocs) {
        TString ArtifactsPath = DstDir;
        if (!Dst) {
            ArtifactsPath = DstDir + "/data/" + ServiceName + "/" + ::ToString(ProcessorOptions.Timestamp);
        } else {
            ArtifactsPath = Dst;
        }
        Dst = ArtifactsPath + "/index.blob";
        ResourceDst = ArtifactsPath + "/shard_resources";
        IncorrectDocsDst = ArtifactsPath + "/dropped_docs";
    } else {
        if (!Dst) {
            Dst = DstDir + "/data/" + ServiceName + "/" + ::ToString(ProcessorOptions.Timestamp) + ".blob";
        }
        ResourceDst = Dst + ".resources";
    }

    if (!TmpDir) {
        TmpDir = DstDir + "/tmp/" + ServiceName + "_" + ::ToString(ProcessorOptions.Timestamp);
    }
    if (Publish) {
        if (SnapshotManager == "yt") {
            if (!SnapshotManagerContext.Server) {
                SnapshotManagerContext.Server = Proxy;
            }
            if (!SnapshotManagerContext.Path) {
                SnapshotManagerContext.Path = DstDir + "/testing/" + ServiceName;
            }
        } else {
            if (!SnapshotManagerContext.Server) {
                ythrow yexception() << "option '--publish-server' is missing";
            }
            if (!SnapshotManagerContext.Path) {
                ythrow yexception() << "option '--publish-path' is missing";
            }
        }
    }

    if (EnableSkyShare) {
        if (BlobFormat == EYTBlobFormat::Old) {
            ythrow yexception() << "Option '--enable-sky-share' may be used only with new blob format";
        }
        if (DumperChunkSize != 4_MB) {
            ythrow yexception() << "For successfull sky sharing we have to use '--chunk-size 40194304' (4MiB)";
        }
    }
    TString configPath = freeArgs.front();
    DaemonOptions.SetConfigFileName(configPath);

    TDaemonConfigPatcher& preprocessor = DaemonOptions.GetPreprocessor();
    if (CutConfigIncludes) {
        preprocessor.AddPatch("Server.Searcher.FactorsInfo", "");
        preprocessor.AddPatch("Server.Indexer.Common.RecognizeLibraryFile", "NOTSET");
    }

    preprocessor.AddPatch("Server.Indexer.Common.SaveDeletedDocuments", ToString(SaveDeletedDocuments));
    preprocessor.AddPatch("Server.IsReadOnly", "false");
    preprocessor.AddPatch("Server.VerificationPolicy", "IndexingUniqueUrls");

    preprocessor.AddPatch("Server.Searcher.AccessLog", "");
    preprocessor.AddPatch("Server.Indexer.Common.IndexLog", "");
    preprocessor.AddPatch("Server.IndexDir", "./index");
    preprocessor.AddPatch("Server.Indexer.Memory.Enabled", "false");
    preprocessor.AddPatch("Server.Indexer.Memory.AllowSameUrls", ToString(!ForbidSameUrls));
    preprocessor.AddPatch("Server.Indexer.Disk.AllowSameUrls", ToString(!ForbidSameUrls));
    preprocessor.AddPatch("Server.ComponentsConfig.DDK.EnableLifetimeCheck", "false");
    preprocessor.AddPatch("Server.AdditionalModules", IndexerModulesToRun);

    preprocessor.SetVariable("INDEX_DIRECTORY", "./index");
    preprocessor.SetVariable("LOG_PATH", ".");
    preprocessor.SetVariable("LOG_LEVEL", "6");
    preprocessor.SetVariable("WorkDir", ".");
    preprocessor.SetVariable("BasePort", "0");
    preprocessor.SetVariable("IS_STANDALONE_INDEXER", "1");
    preprocessor.SetUndefinedVariable("CTYPE", "standalone_indexer");
    preprocessor.SetUndefinedVariable("SERVICE_TYPE", "standalone_indexer");
    preprocessor.SetVariable("LOCATION", "UNKNOWN");
    preprocessor.SetVariable("DM_HOST", "NOTSET");
    preprocessor.SetVariable("DM_PORT", "0");
    preprocessor.SetVariable("SLOT", "localhost:0");
    preprocessor.SetVariable("SERVICE", ServiceName);
    preprocessor.SetVariable("SHARD_MIN", "0");
    preprocessor.SetVariable("SHARD_MAX", "65533");

    IndexingConfigBundle.Text = DaemonOptions.RunPreprocessor();

    if (UseDatalessPartsOnMerge) {
        preprocessor.AddPatch("Server.ComponentsConfig.FULLARC.Layers.full.Compression", "DATALESS");
        preprocessor.AddPatch("Server.ComponentsConfig.FULLARC.Layers.base.Compression", "DATALESS");
        preprocessor.AddPatch("Server.ComponentsConfig.FULLARC.DisablePartsOptimization", "true");
    }
    preprocessor.AddPatch("Server.AdditionalModules", MergerModulesToRun);

    MergingConfigBundle.Text = DaemonOptions.RunPreprocessor();

    preprocessor.SetVariable("DictDir", TFsPath(DictPath).Parent());
    TString configTextForParse = DaemonOptions.RunPreprocessor();

    SearchMapText = TUnbufferedFileInput(SearchMapPath).ReadAll();

    if (!MaxDocumentsPerSegment || !CutConfigIncludes || !MinSegmentsCount) {
        InitGlobalLog2Console(TLOG_CRIT); // suppress logging for config parsing
        TServerConfigConstructorParams params(configTextForParse.data(), "", &DaemonOptions.GetPreprocessor());
        TRTYServerConfig config(params);
        InitGlobalLog2Console();
        if (!MaxDocumentsPerSegment) {
            if (CookFormat == ECookFormat::Delta) {
                MaxDocumentsPerSegment = config.GetRealmListConfig().GetMainRealmConfig().GetIndexerConfigDisk().MaxDocuments;
            } else {
                MaxDocumentsPerSegment = config.GetMergerConfig().MaxDocumentsToMerge;
            }
        }
        if (!MinSegmentsCount && CookFormat != ECookFormat::Delta) {
            MinSegmentsCount = config.GetMergerConfig().MaxSegments;
        }
        if (!CutConfigIncludes) {
            bool hasDict = config.GetCommonIndexers().RecognizeLibraryFile != "NOTSET"
                && config.GetCommonIndexers().RecognizeLibraryFile != "";
            ConfigFiles.Init(configPath, hasDict ? DictPath : "");
            for (auto& path : AttachPath) {
                ConfigFiles.AttachPath(path);
            }

            IndexingConfigBundle.Variables = DaemonOptions.GetPreprocessor().GetVariables();
            MergingConfigBundle.Variables = DaemonOptions.GetPreprocessor().GetVariables();
        }
    }

    if (NSaas::IRowProcessor::TFactory::Construct(Processor, ProcessorOptions)->NeedConfigFiles()) {
        ProcessorOptions.OtherOptions = IndexingConfigBundle.Text;
        ProcessorOptions.ExtraOptions = IndexingConfigBundle.Variables;
    }
}

TYTIndexer::TYTIndexer(int argc, const char* argv[])
    : Options(argc, argv)
    , ClientPtr(NYT::CreateClient(Options.Proxy))
    , SrcDocs(Options.Src.cbegin(), Options.Src.cend())
    , DstIndex(Options.Dst)
{
    Report.AddTimestamp(Options.ProcessorOptions.Timestamp);
    InitNeedToIndex();
    InitTmpTables();
    InitCommands();
}

void TYTIndexer::InitNeedToIndex() {
    // We run indexing phase by default. It can be skipped to re-publish existing up-to-date index table
    NeedToIndex = true;

    if (Options.SkipIndexing) {
        NeedToIndex = false;
        PrintVerbose("--skip-indexing is set. Set NeedToIndex = false");
        return;
    }

    auto missing = std::find_if(SrcDocs.cbegin(), SrcDocs.cend(), [&](auto path){ return !ClientPtr->Exists(path.Path_); });
    if (missing != SrcDocs.cend()) {
        ythrow yexception() << "SRC_TABLE " << missing->Path_.Quote() << " doesn't exist" << Endl;
    }

    if (ClientPtr->Exists(DstIndex.Path_)) {
        if (Options.Force) {
            PrintVerbose(TString("DST_TABLE ") + DstIndex.Path_.Quote() + " already exists, using '--force' to overwrite it");
            return ;
        }
        PrintVerbose("DST_TABLE alredy exists, checking the modification time");
        auto getMTime = [&](auto& path) { return ClientPtr->Get(path.Path_ + "/@modification_time").AsString(); };
        TString dstMTime = getMTime(DstIndex);
        auto newerThanDst = [&](auto& src) -> bool { return (getMTime(src) > dstMTime); }; //ISO8601 compared as strings
        auto newer = std::find_if(SrcDocs.cbegin(), SrcDocs.cend(), newerThanDst);
        if (newer == SrcDocs.cend()) {
            // there're no SRC_TABLEs newer than DST_TABLE
            if (Options.Resume) {
                PrintVerbose("DST_TABLE exists and is up-to-date, using '--resume' to skip the indexing phase");
                NeedToIndex = false;
            } else {
                ythrow yexception() << "DST_TABLE already exists and is up-to-date, you can use:" << Endl
                                    << " * '--resume' amd '--publish' options to publish it" << Endl
                                    << " * '--force' to overwrite it" << Endl
                                    << " * '--timestamp' or '--dst' to use a different destination path" << Endl;
            }
        } else {
            ythrow yexception() << "DST_TABLE exists and is OLDER than SRC_TABLE " << newer->Path_.Quote()
                                << ", use '--force' option to overwrite it or pick a different TIMESTAMP or DST_TABLE";
        }
    }
}

void TYTIndexer::InitTmpTables() {
    TmpTables.clear();
    if (!NeedToIndex) {
        return ;
    }
    TmpTables["processedDocuments"] = NYT::TRichYPath(Options.TmpDir + "/processed.docs");
    auto keys = ShardIdKeyColumns;
    keys.Add("segment_id");
    TmpTables["sortedDocuments"] = NYT::TRichYPath(Options.TmpDir + "/sorted.docs").SortedBy(keys);
    if (Options.SplitSegments) {
        TmpTables["indexPartsHdr"] = NYT::TRichYPath(Options.TmpDir + "/index_parts_hdr.blob").SortedBy(keys);
        TmpTables["indexPartsData"] = NYT::TRichYPath(Options.TmpDir + "/index_parts_data.blob").SortedBy(keys);
        TmpTables["indexResultHdr"] = NYT::TRichYPath(Options.TmpDir + "/index_hdr.blob").SortedBy(ShardIdKeyColumns);
        TmpTables["indexResultData"] = NYT::TRichYPath(Options.TmpDir + "/index_data.blob").SortedBy(ShardIdKeyColumns);
        TmpTables["indexDataRename"] = NYT::TRichYPath(Options.TmpDir + "/index_data.rename").SortedBy(ShardIdKeyColumns);
    } else {
        TmpTables["indexResult"] = NYT::TRichYPath(Options.TmpDir + "/index.blob").SortedBy(TFinalSortCommand::GetSortColumns(Options.BlobFormat));
        TmpTables["sampledShards"] = NYT::TRichYPath(Options.TmpDir + "/sampled_shards").SortedBy(keys);
        TmpTables["shardResources"] = NYT::TRichYPath(Options.TmpDir + "/shard_recources");
        TmpTables["sortedResources"] = NYT::TRichYPath(Options.TmpDir + "/shard_recources.sorted");
    }
}

void TYTIndexer::InitCommands() {
    if (NeedToIndex) {
        Commands.emplace_back(MakeHolder<TProcessDocumentsCommand>(
            SrcDocs,
            TYTCommand::TOutputs{TmpTables["processedDocuments"]},
            Options.Verbose,
            Report,
            TSaasYTDocumentMapper::TContext{
                Options.ServiceName,
                Options.SearchMapText,
                Options.Processor,
                Options.ProcessorOptions,
                Options.MaxDocumentsPerSegment,
                Options.SaveDeletedDocuments,
                Options.CookFormat == ECookFormat::Delta,
                Options.MinSegmentsCount
            },
            Options.ConfigFiles,
            Options.IndexingConfigBundle,
            Options.AddExtraTimestamp,
            Options.IndexerRamMapperGb,
            Options.MaxRowWeightMb,
            Options.Acl
        ));
        Commands.emplace_back(MakeHolder<TSortDocumentsCommand>(
            TYTCommand::TInputs{TmpTables["processedDocuments"]},
            TYTCommand::TOutputs{TmpTables["sortedDocuments"]},
            Options.Verbose,
            Report,
            Options.Acl
        ));
        if (Options.SplitSegments) {
            Commands.emplace_back(MakeHolder<TIndexDocumentsCommand>(
                        TYTCommand::TInputs{TmpTables["sortedDocuments"]},
                        TYTCommand::TOutputs{TmpTables["indexPartsHdr"], TmpTables["indexPartsData"]},
                        Options.Verbose,
                        Report,
                        Options.IndexingConfigBundle,
                        Options.ConfigFiles,
                        Options.FailOnIndexError,
                        Options.DumperChunkSize,
                        Options.UseDatalessPartsOnMerge,
                        !Options.IndexerModulesToRun.empty(),
                        Options.Acl
            ));
            Commands.emplace_back(MakeHolder<TMergeIndexPartsCommand>(
                        TYTCommand::TInputs{TmpTables["indexPartsHdr"]},
                        TYTCommand::TOutputs{TmpTables["indexResultHdr"], TmpTables["indexDataRename"]},
                        Options.Verbose,
                        Report,
                        Options.MergingConfigBundle,
                        Options.ConfigFiles,
                        Options.DumperChunkSize,
                        Options.UseDatalessPartsOnMerge,
                        !Options.MergerModulesToRun.empty(),
                        Options.IndexerRamGb,
                        Options.IndexerRamReserveFactor,
                        Options.IndexerTmpfsGb,
                        Options.IndexerCpu,
                        Options.Acl
            ));
            TYTCommand::TInputs finalSortInputs;
            if (!Options.UseDatalessPartsOnMerge) {
                finalSortInputs = TYTCommand::TInputs{TmpTables["indexResultHdr"]};
            } else {
                Commands.emplace_back(MakeHolder<TRenameIndexPartsDataCommand>(
                            TYTCommand::TInputs{TmpTables["indexPartsData"], TmpTables["indexDataRename"]},
                            TYTCommand::TOutputs{TmpTables["indexResultData"]},
                            Options.Verbose,
                            Report,
                            Options.Acl
                ));
                finalSortInputs = TYTCommand::TInputs{TmpTables["indexResultHdr"], TmpTables["indexResultData"]};
            }
            Commands.emplace_back(MakeHolder<TFinalSortCommand>(
                        finalSortInputs,
                        TYTCommand::TOutputs{DstIndex},
                        Options.Verbose,
                        Report,
                        Options.BlobFormat,
                        Options.Acl
            ));
        } else {
            auto outputs = TYTCommand::TOutputs{TmpTables["indexResult"], TmpTables["sampledShards"], TmpTables["shardResources"]};
            if (Options.FilterIncorrectDocs) {
                outputs.push_back(Options.IncorrectDocsDst);
            }
            Commands.emplace_back(MakeHolder<TIndexSegmentsCommand>(
                        TYTCommand::TInputs{TmpTables["sortedDocuments"]},
                        outputs,
                        Options.Verbose,
                        Report,
                        Options.IndexingConfigBundle,
                        Options.ConfigFiles,
                        Options.IndexerRamGb,
                        Options.IndexerRamReserveFactor,
                        Options.IndexerTmpfsGb,
                        Options.IndexerCpu,
                        Options.SampleShards,
                        Options.SampleUrlsNum,
                        Options.FailOnIndexError,
                        Options.BlobFormat,
                        Options.EnableSkyShare,
                        Options.DumperChunkSize,
                        !Options.IndexerModulesToRun.empty(),
                        Options.FilterIncorrectDocs,
                        Options.Acl
            ));
            Commands.emplace_back(MakeHolder<TShardResourceSortCommand>(
                        TYTCommand::TInputs{TmpTables["shardResources"]},
                        TYTCommand::TOutputs{Options.ResourceDst},
                        Options.Verbose,
                        Report,
                        Options.Acl
            ));
            Commands.emplace_back(MakeHolder<TMoveCommand>(
                        TYTCommand::TInputs{TmpTables["indexResult"]},
                        TYTCommand::TOutputs{DstIndex},
                        Options.Verbose,
                        Report
            ));
        }
    }
    if (Options.Publish) {
        if (Options.SplitSegments) { // TODO: support .resources table and remove this if
            Commands.emplace_back(MakeHolder<TPublishCommand>(
                        TYTCommand::TInputs{DstIndex},
                        TYTCommand::TOutputs{},
                        Options.Verbose,
                        Report,
                        TPublishCommand::TContext{
                            Options.SearchMapText,
                            Options.ServiceName,
                            TInstant::Seconds(Options.ProcessorOptions.Timestamp),
                            Options.CommandLine,
                            SrcDocs,
                            Options.SnapshotManager,
                            Options.SnapshotManagerContext,
                            Options.EnableSkyShare
                        }
            ));
        } else {
            Commands.emplace_back(MakeHolder<TPublishCommand>(
                        TYTCommand::TInputs{DstIndex, Options.ResourceDst},
                        TYTCommand::TOutputs{},
                        Options.Verbose,
                        Report,
                        TPublishCommand::TContext{
                            Options.SearchMapText,
                            Options.ServiceName,
                            TInstant::Seconds(Options.ProcessorOptions.Timestamp),
                            Options.CommandLine,
                            SrcDocs,
                            Options.SnapshotManager,
                            Options.SnapshotManagerContext,
                            Options.EnableSkyShare,
                        }
            ));
        }
    }
    if (Commands.empty()) {
        ythrow yexception() << "Nothing to do. Use '--publish' option to publish index, '--force' to overwrite it or pick a different TIMESTAMP" << Endl;
    }
}

void TYTIndexer::DeleteTmpTables() {
    for (const auto& it : TmpTables) {
        if (ClientPtr->Exists(it.second.Path_)) {
            // Exists() check is auxilliary, just to avoid garbage in verbose output.
            ClientPtr->Remove(it.second.Path_, NYT::TRemoveOptions().Force(true));
            PrintVerbose("Removed: " + it.second.Path_.Quote());
        }
    }
}

void TYTIndexer::SaveSampleTable() {
    ClientPtr->Copy(TmpTables["sampledShards"].Path_,
                    NYT::TYPath(Options.DstDir + "/sampled_shards"),
                    NYT::TCopyOptions().Force(true));
}

void TYTIndexer::PrintVerbose(const TString& msg) {
    if (Options.Verbose) {
        INFO_LOG << "TYTIndexer: " << msg << Endl;
    }
}

void TYTIndexer::Run() {
    for (auto& it : Commands) {
        it->PrepareSpec();
    }
    if (Options.DryRun) {
        return;
    }
    if (NeedToIndex && !Options.Resume) {
        DeleteTmpTables();
    }
    bool resumeFlag = Options.Resume; // if any operations in the chain starts running, the resume mode is cancelled
    try {
        for (auto& it : Commands) {
            it->Run(ClientPtr.Get(), resumeFlag);
        }
    } catch (...) {
        Report.Flush(Options.ReportFilePath);
        throw;
    }

    if (Options.SampleShards) {
        SaveSampleTable();
    }

    if (NeedToIndex && !Options.KeepTemps) {
        DeleteTmpTables();
    }

    if (Options.ReportFilePath) {
        Report.Flush(Options.ReportFilePath);
    }
}
