#include <util/thread/pool.h>

#include <mapreduce/yt/interface/protos/yamr.pb.h>

#include <robot/library/yt/static/command.h>

#include <wmconsole/version3/wmcutil/log.h>
#include <wmconsole/version3/wmcutil/regex.h>
#include <wmconsole/version3/wmcutil/yt/yt_runner.h>
#include <wmconsole/version3/wmcutil/yt/yt_utils.h>

#include "config.h"
#include "utils.h"
#include "task_merge.h"
#include "workflow.h"

namespace NWebmaster {

using namespace NJupiter;

void MergeIntermediateShards(NYT::IClientBasePtr client, const TString &root, const TString &output) {
    TDeque<NYTUtils::TTableInfo> tables;
    NYTUtils::GetTableList(client, root, tables);

    TSortCmd<NYT::TYamr> cmd(client);
    for (const NYTUtils::TTableInfo &table : tables) {
        cmd.Input(TTable<NYT::TYamr>(client, table.Name));
        LOG_INFO("merge, input %s", table.Name.c_str());
    }
    LOG_INFO("merge, output %s", output.c_str());
    cmd
        .Output(TTable<NYT::TYamr>(client, output))
        .By({"key", "subkey"})
        .Do()
    ;
    LOG_INFO("merge, root %s - done", root.c_str());
}

int TaskMerge(int, const char **) {
    const time_t snapshotCurrentProcessedAt = Now().Seconds();

    struct TMergeConfig {
        TMergeConfig(const TString &root, const TString &output)
            : Root(root)
            , Output(output)
        {
        }

    public:
        TString Root;
        TString Output;
    };

    NYT::IClientPtr client = NYT::CreateClient(TCommonYTConfig::CInstance().MR_SERVER_HOST_JUPITER);

    time_t prevSnapshotTs = 0, currSnapshotTs = 0;
    if (!TWorkflow::Instance().GetInProgressSnapshots(client, prevSnapshotTs, currSnapshotTs)) {
        LOG_INFO("merge, there is no unprocessed snapshots");
        return 0;
    }

    const auto &cfg = TConfig::CInstance();
    const TDeque<TMergeConfig> tablesToMerge = {
        { cfg.TABLE_INTERMEDIATE_REPORT,        cfg.TABLE_READY_REPORT + "_" + ToString(snapshotCurrentProcessedAt) },
        { cfg.TABLE_INTERMEDIATE_ARCHIVE_INT,   cfg.TABLE_READY_ARCHIVE_INT },
        { cfg.TABLE_INTERMEDIATE_ARCHIVE_EXT,   cfg.TABLE_READY_ARCHIVE_EXT },
        { cfg.TABLE_INTERMEDIATE_TOP_INT,       cfg.TABLE_READY_TOP_INT },
        { cfg.TABLE_INTERMEDIATE_TOP_EXT,       cfg.TABLE_READY_TOP_EXT },
        { cfg.TABLE_INTERMEDIATE_TOP_NEW_EXT,   cfg.TABLE_READY_TOP_NEW_EXT },
        { cfg.TABLE_INTERMEDIATE_TOP_GONE_EXT,  cfg.TABLE_READY_TOP_GONE_EXT },
    };

    NYT::ITransactionPtr tx = client->StartTransaction();

    LOG_INFO("merge, intermediate shards %ld", currSnapshotTs);

    bool valid = true;
    THolder<IThreadPool> queue(CreateThreadPool(8));
    for (const auto &mc : tablesToMerge) {
        queue->SafeAddFunc([=, &tx, &valid]() {
            const TString root = NUtils::ReplaceAll(mc.Root, "$ts", ToString(currSnapshotTs));
            try {
                MergeIntermediateShards(tx, root, mc.Output);
                TWorkflow::Instance().SetMergeSource(tx, mc.Output, prevSnapshotTs, currSnapshotTs);
            } catch (yexception &e) {
                valid = false;
                LOG_ERROR("merge, intermediate shard %s build error: %s", root.data(), e.what());
            }
        });
    }
    queue->Stop();

    if (valid) {
        TWorkflow::Instance().EndProcessSnapshots(tx);
        tx->Commit();
    }

    LOG_INFO("merge, intermediate shards - done");

    return 0;
}

} //namespace NWebmaster
