
#include <library/cpp/yson/node/node.h>
#include <mapreduce/yt/interface/operation.h>
#include <mapreduce/yt/interface/init.h>
#include <mapreduce/yt/interface/client.h>

#include <library/cpp/getopt/last_getopt.h>

#include <util/folder/path.h>
#include <util/generic/hash_set.h>
#include <util/string/join.h>

class TMapper : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
public:
    void Do(NYT::TTableReader<NYT::TNode> *input, NYT::TTableWriter<NYT::TNode> *output) override {
        for (; input->IsValid(); input->Next()) {
            auto record = input->MoveRow();

            const size_t tableIndex = ChannelsToPathsIndexes
                    .at(record.ChildAsString("channel"))
                    .at(record.ChildAsString("subchannel"));

            output->AddRow(std::move(record), tableIndex);
        }
    }

    void PrepareOperation(const NYT::IOperationPreparationContext &context,
                          NYT::TJobOperationPreparer &preparer) const override {
        const auto &schema = context.GetInputSchema(/* tableIndex */ 0);
        for (const auto&[_, subchannelsIndexes]: ChannelsToPathsIndexes) {
            for (const auto&[_, index]: subchannelsIndexes) {
                preparer.OutputSchema(/* tableIndex */ index, schema);
            }
        }
    }

    Y_SAVELOAD_JOB(ChannelsToPathsIndexes);

    TMapper() = default;

    explicit TMapper(THashMap<TString, THashMap<TString, size_t>> channelsToPathsIndexes) noexcept
            : ChannelsToPathsIndexes(std::move(channelsToPathsIndexes)) {}

private:
    THashMap<TString, THashMap<TString, size_t>> ChannelsToPathsIndexes;
};

REGISTER_MAPPER(TMapper)

void CreateDirIfNotExists(const NYT::IClientPtr &client, const NYT::TYPath &path) {
    if (!client->Exists(path)) {
        client->Create(path, NYT::NT_MAP);
    }
}

int main(int argc, const char **argv) {
    NYT::Initialize(argc, argv);

    NYT::TYPath srcTable;
    NYT::TYPath dstDir;
    {
        NLastGetopt::TOpts opts;
        opts.AddLongOption("src", "table with antifraud log").StoreResult(&srcTable).Required();
        opts.AddLongOption("dst", "dir for store separated logs").StoreResult(&dstDir).Required();
        NLastGetopt::TOptsParseResult parsedOpts(&opts, argc, argv);
    }

    auto client = NYT::CreateClientFromEnv();


    CreateDirIfNotExists(client, dstDir);
    const TString filename = TFsPath(srcTable).GetName();

    THashMap<TString, THashSet<TString>> subchannelByChannels;

    for (const NYT::TTableReaderPtr<NYT::TNode> reader = client->CreateTableReader<NYT::TNode>(
            srcTable + TString{R"({"channel", "subchannel"})"});
         reader->IsValid(); reader->Next()) {

        const NYT::TNode &node = reader->GetRow();

        subchannelByChannels[node.ChildAsString("channel")].emplace(node.ChildAsString("subchannel"));
    }

    TVector<NYT::TYPath> dsts;
    THashMap<TString, THashMap<TString, size_t>> channelsToPaths;

    for (const auto&[channel, subchannels]: subchannelByChannels) {
        Cout << channel << ':' << MakeRangeJoiner(",", subchannels) << Endl;

        const NYT::TYPath channelRoot = dstDir + '/' + channel;

        CreateDirIfNotExists(client, channelRoot);

        for (const auto &subchannel: subchannels) {
            const NYT::TYPath subchannelRoot = channelRoot + '/' + subchannel;
            CreateDirIfNotExists(client, subchannelRoot);

            channelsToPaths[channel][subchannel] = dsts.size();
            dsts.emplace_back(subchannelRoot + '/' + filename);
        }
    }

    NYT::TMapOperationSpec spec;
    spec.AddInput<NYT::TNode>(NYT::TRichYPath(srcTable));
    for (const NYT::TYPath &dst : dsts) {
        spec.AddOutput<NYT::TNode>(dst);
    }
    client->Map(spec, new TMapper(std::move(channelsToPaths)));

    return 0;
}
