#pragma once

#include <util/generic/map.h>
#include <util/generic/set.h>
#include <util/generic/string.h>
#include <util/generic/vector.h>

#include <library/cpp/containers/comptrie/comptrie.h>

#include <catboost/libs/model/model.h>

#include <wmconsole/version3/wmcutil/string.h>

struct TClfDataset {
    TClfDataset();
    void GetFeatures(size_t featuresOffset, TString hostname, TVector<float> &featuresList) const;
    void GetFeatures(const TString &hostname, TVector<float> &featuresList) const;
    void GetFeaturesWithTarget(const TString &hostname, int target, TVector<float> &featuresList) const;

public:
    template<class TCallback>
    static bool EnumNGramms(const TString &hostname, TVector<TStringBuf> &domains, TString &tld, TCallback &&callback) {
        if (hostname.empty()) {
            return false;
        }

        NWebmaster::NUtils::FastSplit(TStringBuf(hostname), '.', domains);
        tld = TString{*domains.rbegin()};
        //TVector<TStringBuf> rest = domains;
        //rest.resize(rest.size() - 1);

        for (const TStringBuf &domain : domains) {
            for (size_t i = 0; i < domain.size() - 1; i++) {
                const char ngramm[3] = { domain[i], domain[i + 1], 0 };
                callback(ngramm);
            }
        }

        return true;
    }

private:
    TMap<TString, size_t> BuildNGrammsMap();
    void Build2ldListTrie();
    bool Is2ldListSubdomain(const TString &hostname) const;
    TMap<TString, size_t> LoadTldMap();
    TMap<TString, double> LoadIdfMap();

public:
    const TMap<TString, size_t> TldMap;
    const TMap<TString, size_t> NGrammsMap;
    const TMap<TString, double> IdfMap;
    TCompactTrie<char> Trie;
    TVector<char> TrieStream;
};

// $ ./catboost fit -f train.06 -t test.06 --verbose --loss-function Logloss --prediction-type Probability -T 16 -i 5000
