#include "describe_ids_processor.h"

#include "indexes.h"
#include "ydb_requests.h"

#include <crypta/lab/lib/native/stats.h>
#include <crypta/lib/native/concurrency/wait_for.h>
#include <crypta/lib/native/identifiers/lib/id_types/all.h>
#include <crypta/lib/native/time/scope_timer.h>
#include <crypta/lib/native/ydb/helpers.h>
#include <crypta/lib/native/ydb/ydb_error_exception.h>
#include <crypta/siberia/bin/common/data/id.h>
#include <crypta/siberia/bin/common/describing/paths/paths.h>
#include <crypta/siberia/bin/common/ydb/tables/user_data_stats_table_fields.h>
#include <crypta/siberia/bin/common/ydb/tables/user_set_stats_fields.h>

#include <util/generic/algorithm.h>

using namespace NCrypta;
using namespace NCrypta::NSiberia;
using namespace NCrypta::NSiberia::NDescriber;

TDescribeIdsProcessor::TDescribeIdsProcessor(
    TDescribeIdsCmd command,
    TInstant commandTimestamp,
    TYdbClient& ydbClient,
    size_t describingBatchSize,
    size_t statsUpdateThreshold,
    ::TStats& stats
)
    : TBase(commandTimestamp, ydbClient, describingBatchSize, statsUpdateThreshold, stats)
    , Command(std::move(command))
{}

bool TDescribeIdsProcessor::ProcessCommand() {
    TScopeTimer timer(Stats.Percentile, "describe_ids.timing.batch_processing");

    const auto& userSetId = Command.GetUserSetId();

    auto logWithInfo = [this, userSetId](const char* msg, const auto&... params) {
        LogInfo(userSetId, msg, params...);
    };

    logWithInfo("Start describing");

    DeleteOldStats(userSetId);

    const auto& userDataTablePath = GetUserDataTablePath(YdbClient, Command.GetExperiment().GetCryptaIdUserDataVersion());
    const auto& idToCryptaIdTablePath = GetIdToCryptaIdTablePath(YdbClient);
    const auto& batchSize = Command.IdsSize();

    Stats.Percentile->Add("describe_ids.batch.count.total_ids", batchSize);
    Stats.Count->Add("describe_ids.count.total_ids", batchSize);
    logWithInfo("Batch size: {}", batchSize);
    if (batchSize == 0) {
        Stats.Count->Add("describe_ids.batch.errors.empty_batch");
        UpdateStats(userSetId, 0, true);
        return true;
    }

    TVector<ui64> cryptaIds;
    TVector<TIdWithHash> ordinaryIds;
    NormalizeIds(ordinaryIds, cryptaIds);

    const auto& idsSize = ordinaryIds.size() + cryptaIds.size();

    Stats.Percentile->Add("describe_ids.batch.count.valid_ids", idsSize);
    Stats.Count->Add("describe_ids.count.valid_ids", idsSize);
    logWithInfo("Valid ids: {}", idsSize);
    if (idsSize == 0) {
        Stats.Count->Add("describe_ids.batch.errors.no_valid_id");
        UpdateStats(userSetId, 0, true);
        return true;
    }

    DescribeNativeCryptaIds(cryptaIds, userDataTablePath);
    DescribeOrdinaryIds(ordinaryIds, idToCryptaIdTablePath, userDataTablePath, cryptaIds.size());

    UpdateStats(userSetId, idsSize, true);
    logWithInfo("Describing finished");
    return true;
}

void TDescribeIdsProcessor::NormalizeIds(TVector<TIdWithHash>& ordinaryIds, TVector<ui64>& cryptaIds) {
    ordinaryIds.reserve(Command.IdsSize());
    cryptaIds.reserve(Command.IdsSize());

    for (const auto& id : Command.GetIds()) {
        const auto& typeName = id.GetType();
        const auto& value = id.GetValue();

        if (NIdentifiers::IDENTIFIERS_NAME_TO_TYPE.contains(typeName)) {
            NIdentifiers::TGenericID genericId(typeName, value);

            if (genericId.IsValid()) {
                const auto& normalizedValue = genericId.Normalize();
                if (genericId.GetType() == NCrypta::NIdentifiersProto::NIdType::CRYPTA_ID) {
                    cryptaIds.push_back(FromString<ui64>(normalizedValue));
                } else if (genericId.GetType() == NCrypta::NIdentifiersProto::NIdType::IDFA_GAID) {
                    for (const auto& asType : {"idfa", "gaid"}) {
                        NIdentifiers::TGenericID idfaGaidId(asType, value);
                        ordinaryIds.emplace_back(GetIdWithCityHash(TStringBuilder() << "@" << asType, idfaGaidId.Normalize()));
                    }
                } else {
                    ordinaryIds.emplace_back(GetIdWithCityHash(TStringBuilder() << "@" << typeName, normalizedValue));
                }
            } else {
                Stats.Count->Add("describe_ids.count.ids_with_invalid_value");
                LogError(Command.GetUserSetId(), "Invalid id found type={}, value={}", typeName, value);
            }
        } else {
            Stats.Count->Add("describe_ids.count.ids_with_unknown_type");
            LogError(Command.GetUserSetId(), "Unknown type found: {}", typeName);
        }
    }

    SortBy(ordinaryIds, [](const auto& item) { return item.Hash; });
    Sort(cryptaIds.begin(), cryptaIds.end());
}

void TDescribeIdsProcessor::DescribeNativeCryptaIds(const TVector<ui64>& cryptaIds, const TString& userDataTablePath) {
    const auto& userSetId = Command.GetUserSetId();
    TIndexes indexes(DescribingBatchSize, cryptaIds.size());

    if (!indexes.BatchExists()) {
        return;
    }

    auto describeCryptaIdsFuture = ::DescribeCryptaIds(YdbClient, cryptaIds, indexes.StartIndex, indexes.EndIndex, userDataTablePath);

    while (indexes.BatchExists()) {
        TScopeTimer timer(Stats.Percentile, "describe_ids.timing.describe_ids_ydb_request");
        const auto describeCryptaIdsResult = WaitFor(describeCryptaIdsFuture).ValueOrThrow();
        ThrowOnError(describeCryptaIdsResult, AddMessagePrefix(userSetId, "Describe crypta ids error"));

        if (indexes.NextBatchExists()) {
            describeCryptaIdsFuture = ::DescribeCryptaIds(YdbClient, cryptaIds, indexes.EndIndex, indexes.NextEndIndex, userDataTablePath);
        }

        AggregateStats(describeCryptaIdsResult.GetResultSet(0));
        Stats.Count->Add("describe_ids.count.described_ids", indexes.EndIndex - indexes.StartIndex);
        UpdateStatsIfNecessary(userSetId, indexes.EndIndex);

        indexes.Shift();
    }
}

void TDescribeIdsProcessor::DescribeOrdinaryIds(const TVector<TIdWithHash>& ordinaryIds, const TString& idToCryptaIdTablePath, const TString& userDataTablePath, size_t alreadyDescribedIdsCount) {
    const auto& userSetId = Command.GetUserSetId();
    TIndexes indexes(DescribingBatchSize, ordinaryIds.size());

    if (!indexes.BatchExists()) {
        return;
    }

    auto getCryptaIdsFuture = GetCryptaIdsForIds(YdbClient, ordinaryIds, indexes.StartIndex, indexes.EndIndex, idToCryptaIdTablePath);

    while (indexes.BatchExists()) {
        TScopeTimer timer(Stats.Percentile, "describe_ids.timing.describe_ids_ydb_request");
        const auto getCryptaIdsResult = WaitFor(getCryptaIdsFuture).ValueOrThrow();
        ThrowOnError(getCryptaIdsResult, AddMessagePrefix(userSetId, "Get crypta ids error"));

        if (indexes.NextBatchExists()) {
            getCryptaIdsFuture = GetCryptaIdsForIds(YdbClient, ordinaryIds, indexes.EndIndex, indexes.NextEndIndex, idToCryptaIdTablePath);
        }

        DescribeCryptaIds(userSetId, userDataTablePath, getCryptaIdsResult.GetResultSet(0));
        Stats.Count->Add("describe_ids.count.described_ids", indexes.EndIndex - indexes.StartIndex);
        UpdateStatsIfNecessary(userSetId, alreadyDescribedIdsCount + indexes.EndIndex);

        indexes.Shift();
    }
}

TString TDescribeIdsProcessor::AddMessagePrefix(TUserSetId userSetId, const TString& message) {
    return TStringBuilder() << "[TDescribeIdsProcessor] [User set id = " << userSetId << "] " << message;
}
