#include "split_by_tag_job.h"
#include "split_by_tag_mapper.h"

#include <crypta/cm/offline/bin/common/match_schema.h>
#include <crypta/lib/native/range/packs.h>
#include <crypta/lib/native/yt/utils/helpers.h>
#include <crypta/lib/native/yt/utils/timed_yt_path_generator.h>

#include <mapreduce/yt/common/config.h>
#include <mapreduce/yt/interface/client.h>
#include <mapreduce/yt/util/ypath_join.h>

#include <util/string/join.h>

using namespace NCrypta::NOfflineCm;

TSplitByTagJob::TSplitByTagJob(const TConfig& config, NLog::TLogPtr log)
    : Config(config)
    , Log(log)
{
    Y_ENSURE(log != nullptr);
}

int TSplitByTagJob::Do() {
    Log->info("================ Start ================");

    auto client = NYT::CreateClient(Config.YtProxy);
    NYT::TConfig::Get()->Pool = Config.YtPool;

    Log->info("Look for source tables in {}", Config.SourceDir);
    const auto& sourceTables = List(client, Config.SourceDir, TListOptions().Absolute(true));
    Log->info("List of source tables [\n{}\n]", JoinSeq(",\n", sourceTables));

    for (const auto& pack: GetPacks(sourceTables, Config.TablePackSize)) {
        auto tx = client->StartTransaction();

        Log->info("Split by tag source tables [\n{}\n]", JoinSeq(",\n", pack));

        TTimedYtPathGenerator pathGenerator;
        TSplitByTagMapper::TOutputIndexes::TBuilder outputBuilder;
        for (const auto& [tag, dir]: Config.DestinationDirs) {
            const auto destinationTable = NYT::TRichYPath(pathGenerator.GetPath(dir)).Schema(GetUnsortedMatchSchema()).OptimizeFor(NYT::OF_SCAN_ATTR);
            outputBuilder.Add(destinationTable, tag);
            Log->info("Destination table for {} is {}", tag, destinationTable.Path_);
            Log->info("Create {} if it not exists", destinationTable.Path_);
            tx->Create(dir, NYT::NT_MAP, NYT::TCreateOptions().Recursive(true).IgnoreExisting(true));
        }

        NYT::TMapOperationSpec spec;
        AddInputs<NYT::TNode>(spec, pack);
        AddOutputs<NYT::TNode>(spec, outputBuilder.GetTables());

        tx->Map(spec, new TSplitByTagMapper(outputBuilder.GetIndexes()));

        if (Config.ShouldDropInput) {
            Log->info("Remove source tables [\n{}\n]", JoinSeq(",\n", pack));
            RemoveTables(tx, pack);
        }

        tx->Commit();
    }

    Log->info("================ Finish ===============");
    return 0;
}
