#include "make_crypta_id_user_data_reducer.h"

#include <crypta/lab/lib/native/segments_user_data_stats_aggregator.h>
#include <crypta/lab/lib/native/user_data_stats_aggregator.h>
#include <crypta/lib/native/proto_serializer/proto_serializer.h>
#include <crypta/lib/proto/user_data/user_data.pb.h>
#include <crypta/lib/proto/user_data/user_data_stats.pb.h>
#include <crypta/siberia/bin/common/yt_schemas/cpp/schemas.h>

using namespace NCrypta;
using namespace NCrypta::NSiberia::NCryptaIdUserData;

namespace {
    template<typename TMessage>
    void SetProtoValue(TMessage* message, const NYT::TNode& value) {
        if (!value.IsNull()) {
            NProtoSerializer::FromString(*message, value.As<TString>());
        }
    }

    NLab::TUserData ParseUserData(NLab::TUserData& userData, const NYT::TNode& node) {
        SetProtoValue(userData.MutableAttributes(), node.At("Attributes"));
        SetProtoValue(userData.MutableVectors(), node.At("Vectors"));
        SetProtoValue(userData.MutableSegments(), node.At("Segments"));
        SetProtoValue(userData.MutableIdentifiers(), node.At("Identifiers"));
        SetProtoValue(userData.MutableAffinities(), node.At("Affinities"));

        const auto& withoutData = node.At("WithoutData");
        if (!withoutData.IsNull()) {
            userData.SetWithoutData(withoutData.AsBool());
        }
        const auto& groupId = node.At("GroupID");
        if (!groupId.IsNull()) {
            userData.SetGroupID(groupId.AsString());
        }
        const auto& cryptaId = node.At("CryptaID");
        if (!cryptaId.IsNull()) {
            userData.SetCryptaID(cryptaId.AsString());
        }
        const auto& yandexuid = node.At("yuid");
        if (!yandexuid.IsNull()) {
            userData.SetYandexuid(yandexuid.AsString());
        }
        return userData;
    }
}

TMakeCryptaIdUserDataReducer::TMakeCryptaIdUserDataReducer(int maxTokensCount, double minSampleRatio)
    : MaxTokensCount(maxTokensCount)
    , MinSampleRatio(minSampleRatio)
{}

void TMakeCryptaIdUserDataReducer::Do(TReader* reader, TWriter* writer) {
    NLab::TUserDataStatsAggregator<> statsAggregator({.MaxTokensCount = MaxTokensCount, .MinSampleRatio = MinSampleRatio, .AccumulateAffinities = false});
    NNativeYT::TProtoState<NLab::TUserDataStatsOptions> state;
    NLab::TSegmentsUserDataStatsAggregator aggregator(state);

    ui64 cryptaId = 0;
    NLab::TUserData userData;
    NLab::TUserDataStats userDataStats;

    for (; reader->IsValid(); reader->Next()) {
        const auto& row = reader->GetRow();
        cryptaId = row.At(NFields::CRYPTA_ID).AsUint64();

        ParseUserData(userData, row);
        aggregator.ConvertUserDataToUserDataStats(userDataStats, userData, false);
        statsAggregator.UpdateWith(userDataStats);
    }

    NLab::TUserDataStats resUserDataStats;
    statsAggregator.MergeInto(resUserDataStats);

    writer->AddRow(
        NYT::TNode()
        (NFields::CRYPTA_ID, cryptaId)
        (NFields::STATS, NProtoSerializer::ToString(resUserDataStats))
    );
}

REGISTER_REDUCER(TMakeCryptaIdUserDataReducer);
