
#include <library/cpp/yson/node/node.h>
#include <mapreduce/yt/interface/operation.h>
#include <mapreduce/yt/interface/init.h>
#include <mapreduce/yt/interface/client.h>

#include <mail/so/spamstop/tools/text2shingles/lib/text2shingles.h>
#include <library/cpp/langs/langs.h>
#include <util/string/join.h>


class TMapper : public NYT::IMapper<NYT::TTableReader<NYT::TNode>, NYT::TTableWriter<NYT::TNode>>
{
public:
    void Do(NYT::TTableReader<NYT::TNode>* input, NYT::TTableWriter<NYT::TNode>* output) override
    {
        for (; input->IsValid(); input->Next()) {
            const auto & record = input->GetRow();

            if(!record.HasKey(sourceFieldName))
                continue;

            const auto & field = record[sourceFieldName];

            if(!field.IsString())
                continue;

            const auto & shingles = NText2Shingles::Text2Shingles(field.AsString(), LANG_UNK, true);

            if(shingles.empty())
                continue;

            output->AddRow(NYT::TNode(record)(targetFieldName, JoinSeq(",", shingles)));
        }
    }

    Y_SAVELOAD_JOB(sourceFieldName, targetFieldName);

    TMapper() = default;
    explicit TMapper(TString sourceFieldName, TString targetFieldName)
            : sourceFieldName(std::move(sourceFieldName)), targetFieldName(std::move(targetFieldName)) {}
private:
    TString sourceFieldName, targetFieldName;
};

REGISTER_MAPPER(TMapper)

int main(int argc, const char ** argv)
{
    NYT::Initialize(argc, argv);

    const auto & progName = argv[0];
    argc --;
    argv ++;

    if(argc < 4) {
        Cerr << "Usage: " << progName << " src_table dst_table source_field_name target_field_name" << Endl;
        return 1;
    }
    const TString & srcTable = argv[0];
    const TString & dstTable = argv[1];
    TString sourceField = argv[2];
    TString targetField = argv[3];

    auto client = NYT::CreateClient("hahn");

    if(!client->Exists(dstTable))
        client->Create(dstTable, NYT::NT_TABLE);

    Y_VERIFY(client->Exists(dstTable));

    NYT::TRichYPath dstTableYPath(dstTable);

    NYT::TMapOperationSpec spec;
    spec
            .AddInput<NYT::TNode>(srcTable)
            .AddOutput<NYT::TNode>(dstTable);
    client->Map(spec, new TMapper(std::move(sourceField), std::move(targetField)));

    return 0;
}
