#include "lal_processor.h"

#include <crypta/lib/native/proto_serializer/proto_serializer.h>
#include <crypta/lib/native/time/scope_timer.h>
#include <crypta/lib/native/time/shifted_clock.h>
#include <crypta/lib/native/yt/dyntables/async_database/yt_exception.h>
#include <crypta/lookalike/proto/cdp_segment_binding.pb.h>
#include <crypta/lookalike/proto/counter_visit.pb.h>
#include <crypta/lookalike/proto/goal_achievement.pb.h>
#include <crypta/lookalike/proto/metrika_segment_binding.pb.h>
#include <crypta/siberia/bin/common/describing/experiment/proto/describing_experiment.pb.h>
#include <crypta/siberia/bin/common/describing/mode/cpp/describing_mode.h>
#include <crypta/siberia/bin/custom_audience/ca_builder/proto/ca_binding.pb.h>
#include <crypta/siberia/bin/custom_audience/common/proto/ca_rule.pb.h>

#include <library/cpp/protobuf/json/json2proto.h>
#include <mapreduce/yt/interface/common.h>

#include <util/generic/maybe.h>
#include <util/system/backtrace.h>

#include <algorithm>

using namespace NCrypta;
using namespace NCrypta::NLookalike;
using namespace NCrypta::NLookalike::NLalManager;
using namespace NCrypta::NSiberia;

namespace {
    NProtobufJson::TJson2ProtoConfig GetJson2ProtoConfig() {
        NProtobufJson::TJson2ProtoConfig config;
        config.AllowUnknownFields = false;
        return config;
    }

    template<typename TProtoBinding>
    struct TYandexuidExtractor {
        using TBinding = TProtoBinding;

        static TId GetId(const TBinding& binding) {
            TId id;
            id.SetType("yandexuid");
            id.SetValue(ToString(binding.GetYandexuid()));
            return id;
        }
    };

    template<typename TProtoBinding>
    struct TCryptaIdExtractor {
        using TBinding = TProtoBinding;

        static TId GetId(const TBinding& binding) {
            TId id;
            id.SetType("crypta_id");
            id.SetValue(ToString(binding.GetCryptaId()));
            return id;
        }
    };

    static const TString LOOKALIKE_OUTPUT = "lookalike_output";
    static const TString AUDIENCE = "audience";
    static const TString SEGMENT_ID = "SegmentID";
    static const TString SEGMENT_TYPE = "SegmentType";
    static const TString STATS = "Stats";

    TString GetParentBaseMetric(const TLalParent::EType& type, const TString& base) {
        return TStringBuilder() << base << ".parents." << to_lower(TLalParent::EType_Name(type)) << ".";
    }

    TString GetParentBaseCountMetric(const TLalParent::EType& type) {
        return GetParentBaseMetric(type, "processors.common.count");
    }

    TString GetParentBaseTimingsMetric(const TLalParent::EType& type) {
        return GetParentBaseMetric(type, "processors.common.timings");
    }

    TDescription GetDescription(NLab::TUserDataStats stats) {
        TDescription description;
        description.SetTs(TShiftedClock::Now().Seconds());
        description.MutableUserDataStats()->Swap(&stats);
        return description;
    }

    NYT::TNode::TListType GetAudienceSegmentKeys(ui64 id) {
        NYT::TNode::TListType keys;
        for (const auto& type : {AUDIENCE, LOOKALIKE_OUTPUT}) {
            keys.push_back(NYT::TNode()(SEGMENT_ID, id)(SEGMENT_TYPE, type));
        }
        return keys;
    }

    TIds CryptaIdsToIds(TPlainIds&& cryptaIds) {
        TIds res;
        for (auto&& cryptaId : *cryptaIds.MutableIds()) {
            auto* id = res.AddIds();
            id->SetValue(std::move(cryptaId));
            id->SetType("crypta_id");
        }
        return res;
    }

    NYT::TNode FindLookalikeOutputRow(const NYT::TNode::TListType& rows) {
        for (const auto& row: rows) {
            if (row.At(SEGMENT_TYPE).AsString() == LOOKALIKE_OUTPUT) {
                return row;
            }
        }
        ythrow yexception() << "Did not found lookalike_output row for segment with id: " << rows[0].At(SEGMENT_ID).AsInt64();
    }

    TLalProcessor::TDescriptionWithMeta GetDescriptionWithMeta(const NYT::TNode& row) {
        auto stats = NProtoSerializer::CreateFromString<NLab::TUserDataStats>(row.At(STATS).AsString());
        const auto size = stats.GetCounts().GetUniqIdValue();
        return TLalProcessor::TDescriptionWithMeta{.Description = GetDescription(std::move(stats)), .Size = size};
    }
}

TLalProcessor::TLalProcessor(TConfig config, TString name, ::TStats& stats)
    : Config(std::move(config))
    , RedescribeTsThreshold((TShiftedClock::Now() - Config.RedescribeTtl).Seconds())
    , Name(name)
    , Stats(stats)
    , Log(NLog::GetLog("worker"))
{}

bool TLalProcessor::Process() {
    try {
        LogInfo("Start processing. Get current lal state");
        return Process(GetLalState());
    } catch (const NYtDynTables::TYtException& e) {
        LogError("Dyntables error. Error = {}", e.what());
        Stats.Count->Add("processors.common.errors.dyntables");
    } catch (const TStaticYtException& e) {
        LogError("YT error. Error = {}", e.what());
        Stats.Count->Add("processors.common.errors.yt");
    } catch (const NSiberia::TSiberiaClientException& e) {
        LogError("Siberia error. Error = {}", e.what());
        Stats.Count->Add("processors.common.errors.siberia");
    } catch (const NGrpc::TGrpcException& e) {
        const auto status = e.GetStatus();
        LogError("GRPC error. StatusCode = {}. InternalError = {}. Message = '{}'. Details = '{}'", status.GRpcStatusCode, status.InternalError, status.Msg, status.Details);
        Stats.Count->Add("processors.common.errors.grpc");
    } catch (const TAudienceSegmentsYtException& e) {
        LogError("Audience segment table error. Error = {}", e.what());
        Stats.Count->Add("processors.common.errors.audience_segments");
    } catch (const NYT::TErrorException& e) {
        LogError("Async error. Error = {}", e.what());
        Stats.Count->Add("processors.common.errors.async");
    } catch (const std::exception& e) {
        LogError("Critical error. Error = {}", e.what());
        LogError(TBackTrace::FromCurrentException().PrintToString());
        Stats.Count->Add("processors.common.errors.critical");
        return true;
    } catch (...) {
        LogError("Critical UNKNOWN error!");
        LogError(TBackTrace::FromCurrentException().PrintToString());
        Stats.Count->Add("processors.common.errors.critical");
        return true;
    }

    return false;
}

TMaybe<TLalState> TLalProcessor::GetLalState() {
    const auto& lalId = ToString(GetLalId());
    const auto& records = Config.LalDatabase.Lookup({lalId});
    const auto& size = records.size();

    if (size == 1) {
        return NProtoSerializer::CreateFromString<TLalState>(records.at(0).Value);
    } else if (size == 0) {
        return Nothing();
    }

    ythrow yexception() << "More than 1 record for the LAL segment. Id = " << lalId;
}

void TLalProcessor::RemoveLalState() {
    LogInfo("Remove lal");
    auto tx = Config.LalDatabase.StartTransaction();
    tx->Delete({ToString(GetLalId())});
    tx->Commit();
    Stats.Count->Add("processors.common.count.changes.removed");
}

void TLalProcessor::WriteLalState(const TLalState& lalState) {
    LogInfo("Write new lal state");
    auto tx = Config.LalDatabase.StartTransaction();

    tx->Write({{.Key = ToString(GetLalId()), .Value = NProtoSerializer::ToString(lalState)}});
    tx->Commit();

    Stats.Count->Add("processors.common.count.changes.updated");
}

TMaybe<TLalProcessor::TDescriptionWithMeta> TLalProcessor::GetAudienceSegmentStats(ui64 parentId) {
    TScopeTimer timer(Stats.Percentile, GetParentBaseTimingsMetric(TLalParent::AUDIENCE_SEGMENT) + "get_stats");

    NYT::TNode::TListType rows;
    try {
        rows = Config.YtClient->LookupRows(Config.AudienceSegmentsTablePath, GetAudienceSegmentKeys(parentId), NYT::TLookupRowsOptions().Columns({STATS, SEGMENT_TYPE}));
    } catch (const yexception& e) {
        ythrow TAudienceSegmentsYtException() << "Audience segments YT table error: " << e.what();
    }

    if (rows.size() == 0) {
        return Nothing();
    } else if (rows.size() == 1) {
        return GetDescriptionWithMeta(rows[0]);
    } else if (rows.size() == 2) {
        //TODO(terekhinam): remove this after developing good decision for CRYPTA-15457
        return GetDescriptionWithMeta(FindLookalikeOutputRow(rows));
    }
    //TODO(terekhinam): replace 2 back with 1 after developing good decision for CRYPTA-15457
    ythrow yexception() << "More than 2 records for the audience segment. Id = " << parentId;
}

TLalProcessor::TParentAudience TLalProcessor::GetParentAudience(const TLalParent& parent) {
    TScopeTimer timer(Stats.Percentile, GetParentBaseTimingsMetric(parent.GetType()) + "get_parent_audience");

    const auto& parentType = parent.GetType();
    const auto& parentId = parent.GetId();

    if (parentType == TLalParent::GOAL) {
        return GetAudience<TYandexuidExtractor<TGoalAchievement>>(parentId, Config.GoalAudienceTablePath);
    } else if (parentType == TLalParent::METRIKA_SEGMENT) {
        return GetAudience<TYandexuidExtractor<TMetrikaSegmentBinding>>(parentId, Config.MetrikaSegmentsTablePath);
    } else if (parentType == TLalParent::METRIKA_COUNTER) {
        return GetAudience<TYandexuidExtractor<TCounterVisit>>(parentId, Config.MetrikaCounterAudienceTablePath);
    } else if (parentType == TLalParent::METRIKA_ECOMMERCE) {
        return GetAudience<TYandexuidExtractor<TMetrikaSegmentBinding>>(parentId, Config.MetrikaEcommerceTablePath);
    } else if (parentType == TLalParent::MOBILE_EVENT) {
        return GetAudience<TYandexuidExtractor<TMetrikaSegmentBinding>>(parentId, Config.MobileEventTablePath);
    } else if (parentType == TLalParent::CDP_SEGMENT) {
        return GetAudience<TYandexuidExtractor<TCdpSegmentBinding>>(parentId, Config.CdpSegmentsTablePath);
    } else if (parentType == TLalParent::CUSTOM_AUDIENCE) {
        return GetAudience<TCryptaIdExtractor<NSiberia::NCustomAudience::NCaBuilder::TCaBinding>>(parentId, Config.CustomAudiencesTablePath);
    }

    ythrow yexception() << "Unknown lal parent type: " << TLalParent::EType_Name(parentType);
}

TMaybe<TLalProcessor::TDescriptionWithMeta> TLalProcessor::DescribeParent(const TLalParent& parent) {
    LogInfo("Try to describe parent: {}", parent.DebugString());

    TMaybe<TDescriptionWithMeta> descriptionWithMeta;
    const auto& parentType = parent.GetType();

    if (parentType == TLalParent::GOAL
        || parentType == TLalParent::METRIKA_SEGMENT
        || parentType == TLalParent::METRIKA_COUNTER
        || parentType == TLalParent::METRIKA_ECOMMERCE
        || parentType == TLalParent::MOBILE_EVENT
        || parentType == TLalParent::CDP_SEGMENT
        || (parentType == TLalParent::CUSTOM_AUDIENCE && NCrypta::NSiberia::IsSlowDescribingMode(Config.DescribingMode)))
    {
        descriptionWithMeta = DescribeParentWithAudience(parent);
    } else if (parentType == TLalParent::AUDIENCE_SEGMENT) {
        descriptionWithMeta = GetAudienceSegmentStats(parent.GetId());
    } else if (parentType == TLalParent::CUSTOM_AUDIENCE && NCrypta::NSiberia::IsFastDescribingMode(Config.DescribingMode)) {
        descriptionWithMeta = DescribeСustomAudience(parent);
    } else {
        ythrow yexception() << "Unknown lal parent type: " << TLalParent::EType_Name(parentType);
    }

    if (descriptionWithMeta) {
        if (descriptionWithMeta->Description.GetUserDataStats().GetCounts().GetTotal() != 0) {
            return descriptionWithMeta;
        } else {
            LogInfo("Stats count equals to 0");
            Stats.Count->Add(GetParentBaseCountMetric(parentType) + "empty_stats");
        }
    }
    return Nothing();
}

TMaybe<TLalProcessor::TDescriptionWithMeta> TLalProcessor::DescribeParentWithAudience(const TLalParent& parent) {
    return DescribeParentAudience(parent.GetType(), GetParentAudience(parent));
}

TMaybe<TLalProcessor::TDescriptionWithMeta> TLalProcessor::DescribeСustomAudience(const TLalParent& parent) {
    NSiberia::NCustomAudience::TCaRule rule;
    NProtobufJson::Json2Proto(parent.GetRule(), rule, GetJson2ProtoConfig());

    auto cryptaIds = NYT::NConcurrency::WaitFor(Config.CaClient.GetIds(rule)).ValueOrThrow();
    auto ids = CryptaIdsToIds(std::move(cryptaIds));
    auto size = ids.IdsSize();

    return DescribeParentAudience(parent.GetType(), {.IdsToDescribe = std::move(ids), .Size = size});
}

TMaybe<TLalProcessor::TDescriptionWithMeta> TLalProcessor::DescribeParentAudience(const TLalParent::EType& parentType, const TParentAudience& parentAudience) {
    const auto& parentMetric = GetParentBaseCountMetric(parentType);
    LogInfo("Parent audience size: {}", parentAudience.Size);
    Stats.Percentile->Add(parentMetric + "parent_audiences", parentAudience.Size);

    if (parentAudience.Size == 0) {
        Stats.Count->Add(parentMetric + "empty_audiences");
        return Nothing();
    }

    return TDescriptionWithMeta {
        .Description = GetDescription(DescribeIds(parentAudience.IdsToDescribe)),
        .Size = parentAudience.Size
    };
}

NLab::TUserDataStats TLalProcessor::DescribeIds(const TIds& ids) {
    TScopeTimer timer(Stats.Percentile, "processors.common.timing.describe_ids");

    LogInfo("Send describe request to Siberia");
    auto exp = MakeMaybe<TDescribingExperiment>();
    exp->SetCryptaIdUserDataVersion("by_crypta_id");
    const auto& userSetId = Config.SiberiaClient.DescribeIds(ids, Config.DescribingMode, exp);

    LogInfo("Siberia user set id = {}", userSetId);
    return GetUserSetStats(userSetId);
}

NLab::TUserDataStats TLalProcessor::GetUserSetStats(ui64 userSetId) {
    NSiberia::TStats stats;
    while (!stats.GetInfo().GetReady()) {
        LogInfo("Wait for {}", Config.StatsCheckInterval.ToString());
        NYT::NConcurrency::TDelayedExecutor::WaitForDuration(Config.StatsCheckInterval);
        LogInfo("Send get stats request to Siberia");
        stats = Config.SiberiaClient.GetUserSetStats(userSetId);
        LogInfo("Stats readiness: {}", stats.GetInfo().GetReady());
    }
    return stats.GetUserDataStats();
}

bool TLalProcessor::NeedToDescribe(const TLalState& state) const {
    return !state.HasDescription() || (state.GetDescription().GetTs() < RedescribeTsThreshold);
}
