#include <mapreduce/yt/interface/client.h>

#include <saas/api/action.h>
#include <saas/api/mr_client/processors/processors.h>
#include <saas/library/hash_to_block_mode/hash_to_block_mode.h>

#include <library/cpp/string_utils/url/url.h>

#include <util/stream/output.h>
#include <util/stream/input.h>
#include <util/generic/size_literals.h>

class TBlockMapper : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
    TString Format;
    ui64 BlockCount;
public:
    TBlockMapper() = default;

    TBlockMapper(const TString format, const ui64 count) : Format(std::move(format)), BlockCount(count) {
    }

    Y_SAVELOAD_JOB(Format, BlockCount);

    void Do(NYT::TTableReader<NYT::TNode>* input, NYT::TTableWriter<NYT::TNode>* output) final {
        NSaas::TRowProcessorOptions processorOptions;
        auto processor = NSaas::IRowProcessor::TFactory::Construct(Format, processorOptions);
                
        for (; input->IsValid(); input->Next()) {
            NYT::TNode node;
            auto action = processor->ProcessRowSingle(input->GetRow());
            TString url = action.GetDocument().GetUrl();
            TString kps = ToString(action.GetPrefix());
            TBlockHash blockHash(BlockCount);

            node["key"] = ToString(blockHash.GetBlockKey(kps, url));
            node["subkey"] = blockHash.GetGtaKey(kps, url);
            node["value"] = action.ToProtobuf().GetDocument().SerializeAsString();

            output->AddRow(node);
        }
    }
};
REGISTER_MAPPER(TBlockMapper);

class TBlockReducer : public NYT::IReducer<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>> {
public:
    void Do(NYT::TTableReader<NYT::TNode>* input, NYT::TTableWriter<NYT::TNode>* output) final {
        NYT::TNode msg;
        NYT::TNode stat;
        ui64 num = 0;
        bool init = true; 
        NSaas::TAction action;
        auto& doc = action.AddDocument();

        for (; input->IsValid(); input->Next(), num++) {
            auto& row = input->GetRow();
            if (init) {
                msg["key"] = row["key"];
                doc.SetUrl(row["key"].AsString());
                init = false;
            }
            doc.AddProperty(row["subkey"].AsString(), row["value"].AsString()); 
        }
        msg["value"] = action.ToProtobuf().SerializeAsString();
        output->AddRow(msg, 0);

        stat["key"] = msg["key"];
        stat["Count"] = ToString(num); 
        output->AddRow(stat, 1);
    }
};
REGISTER_REDUCER(TBlockReducer);

int main(int argc, const char** argv) {
    NYT::Initialize(argc, argv);

    auto client = NYT::CreateClient(argv[1]); 

    const TString inputTable = argv[2];
    const TString outputTable = argv[3];
    const TString statisticsTable = argv[4];

    ui64 totalDocCount = std::stoi(argv[6]);
    ui64 maxBlockDocCount = std::stoi(argv[7]);
    const ui64 blockCount = (totalDocCount + maxBlockDocCount - 1) / maxBlockDocCount;

    client->MapReduce(
        NYT::TMapReduceOperationSpec()
            .ReduceBy({"key"})
            .AddInput<NYT::TNode>(inputTable)
            .AddOutput<NYT::TNode>(outputTable) // 0
            .AddOutput<NYT::TNode>(statisticsTable), // 1
        new TBlockMapper(argv[5], blockCount),
        new TBlockReducer);
    return 0;
}
