#include "common.h"
#include "normalize.h"
#include "user_features_calculator.h"

#include <crypta/lib/native/features_calculator/features_calculator.h>
#include <crypta/lib/proto/user_data/user_data.pb.h>

#include <library/cpp/digest/md5/md5.h>
#include <library/cpp/iterator/functools.h>

#include <util/generic/hash_set.h>
#include <util/generic/string.h>
#include <util/generic/vector.h>
#include <util/string/join.h>

using namespace NCrypta::NLookalike;

TUserFeaturesCalculator::TUserFeaturesCalculator(NCrypta::TFeaturesMapping featuresMapping)
        : TFeaturesCalculator(std::move(featuresMapping)) {
}

TString TUserFeaturesCalculator::PrepareFloatFeatures(const NLab::TUserData& user) const {
    const auto& userDataVector = user.GetVectors().GetVector().GetData();
    TVector<float> userSite2vec(userDataVector.begin(), userDataVector.end());
    Normalize(userSite2vec);

    TVector<float> userCategoricalFeatures(FeaturesMapping.size(), 0);
    const auto userGender = user.GetAttributes().GetGender();
    AddFeatureToVector(userCategoricalFeatures, GetFeatureName("gender", static_cast<float>(userGender)));
    const auto userAge = user.GetAttributes().GetAge();
    AddFeatureToVector(userCategoricalFeatures, GetFeatureName("age", static_cast<float>(userAge)));
    const auto userIncome = user.GetAttributes().GetIncome();
    AddFeatureToVector(userCategoricalFeatures, GetFeatureName("income", static_cast<float>(userIncome)));

    const auto& regionFeature = GetFeatureName("city", user.GetAttributes().GetRegion());
    if (regionFeature != "city_0") {
        if (!AddFeatureToVector(userCategoricalFeatures, regionFeature)) {
            AddFeatureToVector(userCategoricalFeatures, "city_other");
        }
    }

    for (const auto& segment : user.GetSegments().GetSegment()) {
        AddFeatureToVector(userCategoricalFeatures, GetFeatureName(segment.GetKeyword(), segment.GetID()), (float) segment.GetScore());
    }

    return JoinSeq(",", NFuncTools::Concatenate(userSite2vec, userCategoricalFeatures));
}

TString TUserFeaturesCalculator::PrepareAffinitiveSitesIds(const NLab::TUserData& user) {
    THashSet<TString> affinitiveSitesIds;
    const auto& userDataAffinitiveSites = user.GetAffinities().GetAffinitiveSites().GetToken();
    for (const auto& affinitiveSite : userDataAffinitiveSites) {
        affinitiveSitesIds.insert(affinitiveSite.GetToken());
    }
    const auto& userDataTopCommonSites = user.GetAffinities().GetTopCommonSites().GetToken();
    for (const auto& topCommonSite : userDataTopCommonSites) {
        affinitiveSitesIds.insert(topCommonSite.GetToken());
    }

    return JoinSeq(" ", affinitiveSitesIds);
}

TString TUserFeaturesCalculator::PrepareAffinitiveApps(const NLab::TUserData& user) {
    THashSet<TString> affinitiveApps;
    const auto& userDataAffinitiveApps = user.GetAffinities().GetApps().GetToken();
    for (const auto& affinitiveApp : userDataAffinitiveApps) {
        affinitiveApps.insert(ToString(MD5::CalcHalfMix(affinitiveApp.GetToken())));
    }

    return JoinSeq(" ", affinitiveApps);
}
