#include "parse_cm_access_log_errors_schema.h"
#include "parse_cm_access_log_job.h"
#include "parse_cm_access_log_mapper.h"

#include <crypta/cm/offline/bin/common/match_schema.h>
#include <crypta/lib/native/access_log/access_log_fields.h>
#include <crypta/lib/native/range/packs.h>
#include <crypta/lib/native/yt/processed_tables_tracker/processed_tables_tracker.h>
#include <crypta/lib/native/yt/utils/helpers.h>
#include <crypta/lib/native/yt/utils/timed_yt_path_generator.h>

#include <mapreduce/yt/common/config.h>
#include <mapreduce/yt/interface/client.h>
#include <mapreduce/yt/util/ypath_join.h>

#include <util/string/join.h>

using namespace NCrypta;
using namespace NCrypta::NOfflineCm;

TParseCmAccessLogJob::TParseCmAccessLogJob(const TConfig& config, NLog::TLogPtr log)
    : Config(config)
    , Log(log)
{
    Y_ENSURE(log != nullptr);
}

int TParseCmAccessLogJob::Do() {
    Log->info("================ Start ================");

    auto client = NYT::CreateClient(Config.YtProxy);
    NYT::TConfig::Get()->Pool = Config.YtPool;

    TProcessedTablesTracker processedTablesTracker({.SourceDir = Config.SourceDir, .StateTable = Config.TrackTable});

    Log->info("Look for source tables in {}", Config.SourceDir);
    const auto& sourceTables = processedTablesTracker.GetUnprocessedTables(client);
    Log->info("List of source tables [\n{}\n]", JoinSeq(",\n", sourceTables));

    for (const auto& pack: GetPacks(sourceTables, Config.TablePackSize)) {
        auto tx = client->StartTransaction();

        Log->info("Parse source tables [\n{}\n]", JoinSeq(",\n", pack));

        const auto& pathGenerator = TTimedYtPathGenerator();

        NYT::TYPath destinationTablePath = pathGenerator.GetPath(Config.DestinationDir);
        Log->info("Destination table is {}", destinationTablePath);

        NYT::TYPath errorsTablePath = pathGenerator.GetPath(Config.ErrorsDir);
        Log->info("Errors table is {}", errorsTablePath);

        TParseCmAccessLogMapper::TOutputIndexes::TBuilder outputBuilder;
        outputBuilder.Add(
            NYT::TRichYPath(destinationTablePath).Schema(GetUnsortedMatchSchema()).OptimizeFor(NYT::OF_SCAN_ATTR),
            TParseCmAccessLogMapper::EOutputTables::Matches);
        outputBuilder.Add(
            NYT::TRichYPath(errorsTablePath).Schema(GetParseCmAccessLogErrorsSchema()).OptimizeFor(NYT::OF_SCAN_ATTR),
            TParseCmAccessLogMapper::EOutputTables::Errors);

        auto spec = NYT::TMapOperationSpec();
        AddInputs<NYT::TNode>(spec, GetTablesWithColumns(pack, {NAccessLogFields::UNIXTIME, NAccessLogFields::QUERY, NAccessLogFields::BODY, NAccessLogFields::REPLY, NAccessLogFields::HTTP_CODE}));
        AddOutputs<NYT::TNode>(spec, outputBuilder.GetTables());

        tx->Map(spec, new TParseCmAccessLogMapper(outputBuilder.GetIndexes()));

        processedTablesTracker.AddProcessedTables(tx, pack);

        tx->Commit();

        SetTtl(client, errorsTablePath, TDuration::Days(Config.ErrorsTtlDays), ESetTtlMode::RemoveIfEmpty);
    }

    Log->info("================ Finish ================");
    return 0;
}
