#include "get_ext_id_mapper.h"

#include "fields.h"
#include "prepare_to_sync_db_errors_schema.h"
#include "prepare_to_sync_db_job.h"
#include "prepare_to_identify_reducer.h"

#include <crypta/lib/native/iscrypta_log_parser/iscrypta_log_fields.h>
#include <crypta/lib/native/range/packs.h>
#include <crypta/lib/native/sampler/rest_sampler.h>
#include <crypta/lib/native/yt/processed_tables_tracker/processed_tables_tracker.h>
#include <crypta/lib/native/yt/utils/helpers.h>
#include <crypta/lib/native/yt/utils/timed_yt_path_generator.h>

#include <mapreduce/yt/common/config.h>
#include <mapreduce/yt/interface/common.h>
#include <mapreduce/yt/util/temp_table.h>

#include <util/string/join.h>

using namespace NCrypta;
using namespace NCrypta::NCm::NPrepareSync;

namespace {
    NYT::TTableSchema GetIdentifySchema() {
        using namespace NFields;

        return NYT::TTableSchema()
            .AddColumn(NYT::TColumnSchema().Name(TAG).Type(NYT::VT_STRING, /*required*/ true).SortOrder(NYT::SO_ASCENDING))
            .AddColumn(NYT::TColumnSchema().Name(EXT_ID).Type(NYT::VT_STRING, /*required*/ true).SortOrder(NYT::SO_ASCENDING))
            .Strict(true)
            .UniqueKeys(true);
    }
}

TPrepareToSyncDbJob::TPrepareToSyncDbJob(const TConfig& config, NLog::TLogPtr log)
    : Config(config)
    , Log(log)
{
    Y_ENSURE(Log != nullptr);
}

int TPrepareToSyncDbJob::Do() {
    using namespace NIscryptaLogFields;
    using namespace NFields;

    Log->info("================ Start ================");

    auto client = NYT::CreateClient(Config.GetYtProxy());
    NYT::TConfig::Get()->Pool = Config.GetYtPool();

    TProcessedTablesTracker processedTablesTracker({.SourceDir = Config.GetSourceDir(), .StateTable = Config.GetTrackTable()});

    Log->info("Look for source tables in {}", Config.GetSourceDir());
    const auto& sourceTables = processedTablesTracker.GetUnprocessedTables(client);
    Log->info("List of source tables [\n{}\n]", JoinSeq(",\n", sourceTables));

    const TRestSampler sampler(Config.GetSamplerDenominator(), Config.GetSamplerRest(), TRestSampler::EMode::Equal);

    for (const auto& pack: GetPacks(sourceTables, Config.GetMaxTablesToProcess())) {
        auto tx = client->StartTransaction();

        Log->info("Parse source tables [\n{}\n]", JoinSeq(",\n", pack));

        TTimedYtPathGenerator pathGenerator;
        const auto& toIdentifyTablePath = pathGenerator.GetPath(Config.GetToIdentifyDir());
        const auto& errorsTablePath = pathGenerator.GetPath(Config.GetErrorsDir());

        Log->info("To identify table is {}", toIdentifyTablePath);
        Log->info("Errors table is {}", errorsTablePath);

        {
            NYT::TTempTable toIdentifyTmp(tx);

            TGetExtIdMapper::TOutputIndexes::TBuilder outputBuilder;
            outputBuilder.Add(toIdentifyTmp.Name(), TGetExtIdMapper::EOutputTables::ToIdentify);
            outputBuilder.Add(
                NYT::TRichYPath(errorsTablePath).Schema(GetPrepareToSyncDbErrorsSchema()).OptimizeFor(NYT::OF_SCAN_ATTR),
                TGetExtIdMapper::EOutputTables::Errors);

            NYT::TMapOperationSpec mapSpec;
            AddInputs<NYT::TNode>(mapSpec, GetTablesWithColumns(pack, {REQUEST, UNIXTIME}));
            AddOutputs<NYT::TNode>(mapSpec, outputBuilder.GetTables());

            tx->Map(
                mapSpec,
                new TGetExtIdMapper(outputBuilder.GetIndexes(), sampler)
            );
            tx->Sort(
                toIdentifyTmp.Name(),
                toIdentifyTmp.Name(),
                {TAG, EXT_ID}
            );
            tx->Reduce(
                new TPrepareToIdentifyReducer,
                toIdentifyTmp.Name(),
                NYT::TRichYPath(toIdentifyTablePath).Schema(GetIdentifySchema()).OptimizeFor(NYT::OF_SCAN_ATTR),
                {TAG, EXT_ID}
            );
        }

        processedTablesTracker.AddProcessedTables(tx, pack);

        tx->Commit();

        SetTtl(client, errorsTablePath, TDuration::Days(Config.GetErrorsTtlDays()), ESetTtlMode::RemoveIfEmpty);
    }

    Log->info("================ Finish ================");
    return 0;
}
