#include <util/string/join.h>

#include <wmconsole/version3/wmcutil/yt/yt_runner.h>
#include <wmconsole/version3/junk/spam_hosts_ml/dataset/predict.h>

int main(int argc, const char** argv) {
    using namespace NWebmaster;
    setenv("YT_POOL", "robot-webmaster", 1);

    NYT::Initialize(argc, argv);
    NYT::IClientPtr client = NYT::CreateClient("banach.yt.yandex.net");

    NYTUtils::DisableLogger();

    TClfPredictor Clf;

    TMap<size_t, TVector<TString>> uidHostnameMap;

    auto reader = client->CreateTableReader<NYT::TNode>("//home/webmaster/prod/export/archive/webmaster-verified-hosts/webmaster-verified-hosts.20170817");
    for (int i = 0; i < Max<int>() && reader->IsValid(); reader->Next(), i++) {
        const size_t uid = FromString<size_t>(reader->GetRow()["user_id"].AsString());
        const TString hostname = reader->GetRow()["host_url"].AsString();
        uidHostnameMap[uid].push_back(hostname);
    }

    for (const auto &obj : uidHostnameMap) {
        const size_t uid = obj.first;
        const TVector<TString> &hostnames = obj.second;
        const TVector<double> probabilites = Clf.Predict(hostnames);
        double avg = 0.0;
        for (size_t i = 0; i < probabilites.size(); i++) {
            avg += probabilites[i];
        }
        avg /= static_cast<double>(probabilites.size());
        Cout << uid << "\t" << avg << "\t" << JoinSeq(";", hostnames) << Endl;
    }

    return 0;
}
