#include <util/generic/size_literals.h>
#include <util/generic/size_literals.h>
#include <mapreduce/yt/interface/client.h>
#include <mapreduce/yt/common/config.h>
#include <datacloud/dev_utils/data/custom_data_utils.h>
#include <datacloud/features/cluster/data/data.pb.h>


using namespace NYT;

namespace Datacloud {
namespace Features {
namespace Cluster {

class TYuidToCidReducer : public IReducer<
    TTableReader<::google::protobuf::Message>,
    TTableWriter<TCidToBowRecord> >
{
public:
    void Do(TReader* reader, TWriter* writer) override {
        TString externalId;
        for (; reader->IsValid(); reader->Next()) {
            const auto tableIndex = reader->GetTableIndex();
            if (tableIndex == 0) {
                const auto& rec = reader->GetRow<TYuidToCidRecord>();
                externalId = rec.GetCid();  // ExternalIdKey;
            } else if (!externalId.empty()) {
                const auto& rec = reader->GetRow<TUrlRecord>();
                TCidToBowRecord outputRecord;
                outputRecord.SetCid(externalId); // SetKey
                outputRecord.SetHost(rec.GetHost());
                writer->AddRow(outputRecord);
            } else {
                break;
            }
        }
    }
};
REGISTER_REDUCER(TYuidToCidReducer);

void FastYuidToCid(
    const TString& ytToken,
    const TString& cluster,
    const TString& yuidToCidTable,
    const TVector<TString>& inputTables,
    const TString& outputTable
) {
    auto client = NYT::CreateClient(
        cluster, NYT::TCreateClientOptions().Token(ytToken));

    const auto outputSchema = TTableSchema()
        .AddColumn(TColumnSchema().Type(VT_STRING).Name("cid"))  // Key
        .AddColumn(TColumnSchema().Type(VT_STRING).Name("host"));

    auto spec = TReduceOperationSpec()
        .ReduceBy({"yuid"})
        .AddInput<TYuidToCidRecord>(yuidToCidTable)
        .AddOutput<TCidToBowRecord>(TRichYPath(outputTable)
            .Schema(outputSchema));

    for (const auto& table: inputTables) {
        spec.AddInput<TUrlRecord>(table);
    }

    client->Reduce(spec, new TYuidToCidReducer);
}

// TODO: Change to reduce unique
class THostnameBowReducer : public IReducer<
    TTableReader<TKeyToHostRecord>,
    TTableWriter<TKeyToHostRecord> >
{
public:
    void Do(TReader* reader, TWriter* writer) override {
        TMap <TString, ui64> bow;
        TString key;
        for (; reader->IsValid(); reader->Next()) {
            const auto rec = reader->GetRow();
            // TODO: Use set
            bow[rec.GetHost()] = 1;
            key = rec.GetKey();
        }
        TKeyToHostRecord outputRecord;
        outputRecord.SetKey(key);
        for (const auto& item : bow) {
            outputRecord.SetHost(item.first);
            writer->AddRow(outputRecord);
        }
    }
};
REGISTER_REDUCER(THostnameBowReducer);


void FastHostnameBow(
    const TString& ytToken,
    const TString& cluster,
    const TVector<TString>& inputTables,
    const TString& outputTable
) {
    auto client = NYT::CreateClient(
        cluster, NYT::TCreateClientOptions().Token(ytToken));

    const auto outputSchema = TTableSchema()
        .AddColumn(TColumnSchema().Type(VT_STRING).Name("host")) 
        .AddColumn(TColumnSchema().Type(VT_STRING).Name("key"));

    auto spec = TReduceOperationSpec()
        .ReduceBy({"key"})
        .AddOutput<TKeyToHostRecord>(TRichYPath(outputTable)
            .Schema(outputSchema));

    for (const auto& table: inputTables) {
        spec.AddInput<TKeyToHostRecord>(table);
    }

    client->Reduce(spec, new THostnameBowReducer);
}


class TAppendHostVectorReducer : public IReducer<
    TTableReader<::google::protobuf::Message>,
    TTableWriter<TExtIdToFeaturesRecord> >
{
public:
    void Do(TReader* reader, TWriter* writer) override {
        TString outVector;
        for (; reader->IsValid(); reader->Next()) {
            if (reader->GetTableIndex() == 0) {
                const auto& row = reader->GetRow<THostToVecRecord>();
                outVector = row.GetVectorB() + row.GetVectorM();
            } else if (!outVector.empty()) {
                TExtIdToFeaturesRecord outputRecord;
                outputRecord.SetKey(reader->GetRow<TKeyToHostRecord>().GetKey());
                outputRecord.SetFeatures(outVector);
                writer->AddRow(outputRecord);
            } else {
                break;
            }
        }
    }
};
REGISTER_REDUCER(TAppendHostVectorReducer);


void FastAppendHostVectorReducer(
    const TString& ytToken,
    const TString& cluster,
    const TString& hostToVectorTable,
    const TString& keyToHostTable,
    const TString& outputTable
) {
    auto client = NYT::CreateClient(
        cluster, NYT::TCreateClientOptions().Token(ytToken));

    const auto outputSchema = TTableSchema()
        .AddColumn(TColumnSchema().Type(VT_STRING).Name("key"))
        .AddColumn(TColumnSchema().Type(VT_STRING).Name("features"));

    auto spec = TJoinReduceOperationSpec()
        .ReducerSpec(TUserJobSpec().CpuLimit(0.3))
        .JoinBy({"host"})
        .AddInput<THostToVecRecord>(
            TRichYPath(hostToVectorTable).Foreign(true))
        .AddInput<TKeyToHostRecord>(
            TRichYPath(keyToHostTable))
        .AddOutput<TExtIdToFeaturesRecord>(TRichYPath(outputTable)
            .Schema(outputSchema));

    client->JoinReduce(spec, new TAppendHostVectorReducer);
}


class TUserVectorReducer : public IReducer<
    TTableReader<TExtIdToFeaturesRecord>,
    TTableWriter<TExtIdToFeaturesRecord> >
{
public:
    void Do(TReader* reader, TWriter* writer) override {
        TVector<float> outVector;
        TString key;
        for (; reader->IsValid(); reader->Next()) {
            const auto row = reader->GetRow();
            const auto& vector = DataCloud::FromString(row.GetFeatures());
            if (!outVector.empty()) {
                for (size_t idx = 0; idx < outVector.size(); ++idx) {
                    outVector[idx] += vector[idx];
                }
            } else {
                key = row.GetKey();
                outVector = vector;
            }
        }
        TExtIdToFeaturesRecord outputRecord;
        outputRecord.SetKey(key);
        outputRecord.SetFeatures(DataCloud::ToString(outVector));
        writer->AddRow(outputRecord);
    }
};
REGISTER_REDUCER(TUserVectorReducer);

void FastUserVectorReducer(
    const TString& ytToken,
    const TString& cluster,
    const TString& inputTable,
    const TString& outputTable
) {
    auto client = NYT::CreateClient(
        cluster, NYT::TCreateClientOptions().Token(ytToken));

    const auto outputSchema = TTableSchema()
        .AddColumn(TColumnSchema().Type(VT_STRING).Name("key"))
        .AddColumn(TColumnSchema().Type(VT_STRING).Name("features"));

    auto spec = TMapReduceOperationSpec()
        .ReduceBy({"key"})
        .AddInput<TExtIdToFeaturesRecord>(inputTable)
        .AddOutput<TExtIdToFeaturesRecord>(TRichYPath(outputTable)
            .Schema(outputSchema));

    client->MapReduce(spec, nullptr, new TUserVectorReducer, new TUserVectorReducer);
}

}
}
}
