#include "factors_config.h"

#include "factor.h"
#include "factors_domain.h"
#include "rank_model.h"
#include "rty_features_factory.h"

#include <saas/library/daemon_base/daemon/daemon.h>

#include <search/meta/data_registry/data_registry.h>

#include <kernel/externalrelev/sent_zones.h>
#include <kernel/factor_storage/factor_storage.h>
#include <kernel/indexann_data/data.h>
#include <kernel/web_factors_info/factor_names.h>
#include <library/cpp/digest/md5/md5.h>
#include <library/cpp/regex/pcre/regexp.h>
#include <util/stream/file.h>
#include <util/folder/pathsplit.h>
#include <util/folder/path.h>

namespace NRTYFactors {

    namespace {
        [[maybe_unused]]
        bool DbgIsSorted(const TFactorsList& list) {
            return std::is_sorted(list.cbegin(), list.cend(), [](const TFactor& a, const TFactor& b) {
                return a.IndexGlobal < b.IndexGlobal;
            });
        }

        inline ui32 MaxIndex(const TFactorsList& list) {
            Y_ASSERT(DbgIsSorted(list));
            return list.empty() ? 0 : list.back().IndexGlobal;
        }

        template<class TFactorsHashList>
        inline ui32 MaxIndex(const TFactorsHashList& list) {
            ui32 result = 0;
            for (typename TFactorsHashList::const_iterator i = list.begin(), e = list.end(); i != e; ++i) {
                if (result < MaxIndex(i->second->GetFactorsList())) {
                    result = MaxIndex(i->second->GetFactorsList());
                }
            }
            return result;
        }
    }

    namespace NTraits {
        constexpr ui32 DocsLens = 1;
    }

    class TConfig::TImpl {
    public:
        TImpl(TConfig& parent, const char* filename, i32 configVersion)
            : Parent(parent)
            , FileName(filename)
            , Initialized(false)
            , DefaultRankModel(nullptr)
            , ConfigVersion(configVersion)
        {
        }

        void Init(TStringBuf factorsModels, TConfigPreprocessor *configPreprocessor) {
            if (!FileName) {
                CreateFactorsDomain(nullptr);
                return;
            }

            if (!TFsPath(FileName).Exists()) {
                ythrow yexception() << "Incorrect file name factors config " << FileName;
            }
            {
                TConfigPreprocessor dummy;
                TConfigPreprocessor *preprocessor = configPreprocessor ? configPreprocessor : &dummy;
                const TString configText(preprocessor->ReadAndProcess(FileName));
                TStringInput si(configText);
                NJson::TJsonReaderConfig readerCfg;
                readerCfg.AllowComments = true;
                if(!NJson::ReadJsonTree(&si, &readerCfg, &JsonConfig))
                    ythrow yexception() << "invalid config file " << FileName;
            }
            if (JsonConfig.GetType() != NJson::JSON_MAP)
                ythrow yexception() << "invalid config file " << FileName;

            CreateFactorsDomain(&JsonConfig);
            LoadRtyFeatures(JsonConfig, factorsModels);
            LoadFactors(ZoneFactors, JsonConfig, "zone_factors", ftZone);
            LoadFactors(StatFactors, JsonConfig, "static_factors", ftStatic);
            LoadFactors(IgnoreFactors, JsonConfig, "ignored_factors", ftStatic);
            LoadFactors(DynFactors, JsonConfig, "^dynamic_factors", ftDynamic);
            LoadFactors(RTYDynFactors, JsonConfig, "rty_dynamic_factors", ftRtyDynamic);
            LoadFactors(UsrFactors, JsonConfig, "user_factors", ftUser);
            LoadFactors(SuggestFactors, JsonConfig, "suggest_factors", ftSuggest);
            LoadFactors(SuggestDictFactors, JsonConfig, "suggest_dict_factors", ftSuggestDict);
            LoadHashFactors<TQSFactorsHashList, TQSFactorsList>(QuerySpecFactors, JsonConfig, "QS_.*", ftQS, "QS_");
            LoadHashFactors<TCSFactorsHashList, TCSFactorsList>(CommonStatFactors, JsonConfig, "CS_.*", ftCommonStatic, "CS_");
            LoadHashFactors<TAnnFactorsHashList, TAnnFactorsList>(AnnFactors, JsonConfig, "ann_factors_.*", ftAnn, "ann_factors_");
            LoadImports(JsonConfig);
            LoadStreams(JsonConfig);
            LoadGeo(JsonConfig);
            THashSet<TString> usedZones;
            for(TFactorsList::const_iterator i = ZoneFactors.begin(); i != ZoneFactors.end(); ++i)
                usedZones.insert(i->ZoneName);
            if (usedZones.size() > sizeof(TSentenceZones) * 8)
                ythrow yexception() << "No many zones with zone_factors. maximum zones count is 32";
            LoadFactorSets(JsonConfig);
            LoadRankModels(JsonConfig);

            Initialized = true;
        }

        bool IsZoneUsed(const char* name) const {
            for (ui32 i = 0; i < ZoneFactors.size(); ++i) {
            if (strcmp(ZoneFactors[i].ZoneName.data(), name) == 0)
                return true;
            }
            return false;
        }

        const TFactorsList& GetZoneFactors() const {
            return ZoneFactors;
        }

        const TFactorsList& StaticFactors() const {
            return StatFactors;
        }

        const TFactorsList& IgnoredFactors() const {
            return IgnoreFactors;
        }

        const TFactorsList& DynamicFactors() const {
            return DynFactors;
        }

        const TFactorsList& UserFactors() const {
            return UsrFactors;
        }

        const TFactorsList& RtyDynamicFactors() const {
            return RTYDynFactors;
        }

        const TFactorsList& GetSuggestFactors() const {
            return SuggestFactors;
        }

        const TFactorsList& GetSuggestDictFactors() const {
            return SuggestDictFactors;
        }

        const TQSFactorsHashList& GetQuerySpecFactors() const {
            return QuerySpecFactors;
        }

        const TQSFactorsList::TPtr GetQuerySpecFactors(const TString& fName) const {
            TQSFactorsHashList::const_iterator i = QuerySpecFactors.find(fName);
            return i == QuerySpecFactors.end() ? nullptr : i->second;
        }

        const TCSFactorsHashList& GetCommonStatFactors() const {
            return CommonStatFactors;
        }

        const TCSFactorsList::TPtr GetCommonStatFactors(const TString& fName) const {
            TCSFactorsHashList::const_iterator i = CommonStatFactors.find(fName);
            return i == CommonStatFactors.end() ? nullptr : i->second;
        }

        const TAnnFactorsHashList& GetAnnStreams() const {
            return AnnFactors;
        }

        const TAnnFactorsList::TPtr GetAnnStreams(const TString& fName) const {
            TQSFactorsHashList::const_iterator i = AnnFactors.find(fName);
            return i == AnnFactors.end() ? nullptr : i->second;
        }

        const TGeoLayersList& GetGeoLayers() const {
            return GeoLayers;
        }

        size_t GetFactorCount() const {
            if (FactorsByIndexInFormula.empty())
                return 0;
            return FactorsByIndexInFormula.rbegin()->first + 1;
        }

        const TFactor* GetFactorByName(TStringBuf name) const {
            TMap<TString, TFactor>::const_iterator i = FactorsByName.find(name);
            return i == FactorsByName.end() ? nullptr : &i->second;
        }

        const char* GetFactorName(size_t index) const {
            TMap<ui32, TFactor>::const_iterator i = FactorsByIndexInFormula.find(index);
            if (i == FactorsByIndexInFormula.end())
                return "";
            return i->second.Name.data();
        }

        bool GetFactorIndex(const char* name, size_t* index) const {
            *index = GetFactorGlobalNum(name);
            return *index != NRTYFactors::NOT_FACTOR;
        }

        bool IsUnusedFactor(size_t index) const {
            return FactorsByIndexInFormula.find(index) == FactorsByIndexInFormula.end();
        }

        const TFactor* GetFactorByFormulaIndex(size_t indexFormula, bool checkFactorExists = true) const {
            TMap<ui32, TFactor>::const_iterator i = FactorsByIndexInFormula.find(indexFormula);
            if (i == FactorsByIndexInFormula.end()) {
                Y_ENSURE(!checkFactorExists, "Incorrect factor index " << indexFormula << " in formula");
                return nullptr;
            }
            return &i->second;
        }

        const TFactorsList* GetFactorsByRtyType(const TString& userInput) const {
            // userInput is a user-specified string from relev.conf
            // This resolver is called when the user wants to include all user factors, all static factors etc. in a factor_set
            // Not all groups are available here (users should use the "range" syntax if they want everything, or &relev=all_factors)
            if (userInput == "user_factors")
                return &UserFactors();
            if (userInput == "static_factors")
                return &StaticFactors();
            return nullptr;
        }

        TVector<const TFactor*> ListFactorsByRange(size_t beginGlobalIndex, size_t endGlobalIndex) const {
            if (endGlobalIndex < beginGlobalIndex) {
                endGlobalIndex = beginGlobalIndex;
            }

            TVector<const TFactor*> result;
            const auto begin = FactorsByIndexInFormula.lower_bound(beginGlobalIndex);
            const auto end = FactorsByIndexInFormula.lower_bound(endGlobalIndex);
            for (auto i = begin; i != end; ++i) {
                result.push_back(&i->second);
            }
            return result;
        }

        size_t GetFactorNum(const TString& name) const {
            TNameToPosition::const_iterator i = NameToPosition.find(name);
            return i != NameToPosition.end() ? i->second : NOT_FACTOR;
        }

        size_t GetFactorGlobalNum(const char* name, size_t length = TString::npos) const {
            const size_t len = (length == TString::npos) ? TString::StrLen(name) : length;
            return GetFactorGlobalNum(TStringBuf(name, len));
        }

        size_t GetFactorGlobalNum(TStringBuf name) const {
            TNameToPosition::const_iterator i = NameToPositionGlobal.find(name);
            return i != NameToPositionGlobal.end() ? i->second : NOT_FACTOR;
        }

        bool IsDpFactor(TStringBuf name) const {
            return DpFactors.contains(name);
        }

        TString GetKey() const {
            MD5 result;
            for (TFactorsList::const_iterator i = StatFactors.begin(); i != StatFactors.end(); ++i)
                result.Update(i->Name.data(), i->Name.size());
            char buf[33];
            return result.End(buf);
        }

        const TFactorSet* GetFactorSet(const TString& name) const {
            auto i = FactorSets.find(name);
            if (i == FactorSets.end())
                return nullptr;
            Y_ASSERT(i->second);
            Y_ASSERT(i->second->GetName() == name);
            return i->second.Get();
        }

        bool GetStreamIndex(const TString& name, size_t* index) const {
            TMap<TString, TFactor>::const_iterator i = StreamsByName.find(name);
            if (i == StreamsByName.end())
                return false;
            if (index)
                *index = i->second.IndexGlobal;
            return true;
        }

        const TUserFunctionsList& GetUserFunctions() const {
            return UserFunctions;
        }

        const TRankModelHolder* GetRankModel(const TStringBuf name, const TStringBuf baseRankModel = {}, const TStringBuf polynom = {}) const {
            return GetRankModel(name, DefaultRankModel, baseRankModel, polynom);
        }

        const TRankModelHolder* GetFastRankModel(const TStringBuf name, const TStringBuf polynom = {}) const {
            return GetTypedRankModel(name, "fast_rank", polynom);
        }

        const TRankModelHolder* GetFilterModel(const TStringBuf name, const TStringBuf polynom = {}) const {
            return GetTypedRankModel(name, "filter_rank", polynom);
        }

        const TRankModelHolder* GetFastFilterModel(const TStringBuf name, const TStringBuf polynom = {}) const {
            return GetTypedRankModel(name, "fast_filter_rank", polynom);
        }

        const TRankModelHolder* GetL3RankModel(const TStringBuf name) const {
            return GetTypedRankModel(name, "l3_rank");
        }

        TRankModels GetRankModels() const {
            TReadGuard g(MutexRankModelGen);
            return RankModels;
        }

        TFactorModels& GetFactorModels() const {
            return FactorModels;
        }

        bool HasRankModel(const TString& name) const {
            TReadGuard g(MutexRankModelGen);
            return RankModels.find(name) != RankModels.end();
        }

        ui32 GetFactorsCountByIndex(bool withMn = true) const {
            ui32 count = 1;
            count = Max(count, 1 + MaxIndex(DynamicFactors()));
            count = Max(count, 1 + MaxIndex(StaticFactors()));
            count = Max(count, 1 + MaxIndex(UserFactors()));
            count = Max(count, 1 + MaxIndex(GetZoneFactors()));
            count = Max(count, 1 + MaxIndex(RtyDynamicFactors()));
            count = Max(count, 1 + MaxIndex(GetSuggestFactors()));
            count = Max(count, 1 + MaxIndex(GetSuggestDictFactors()));
            count = Max(count, 1 + MaxIndex<TQSFactorsHashList>(GetQuerySpecFactors()));
            count = Max(count, 1 + MaxIndex<TCSFactorsHashList>(GetCommonStatFactors()));
            if (withMn) {
                if (Parent.GetMatrixNetGlobalIndex() < Max<ui32>() && count < 1 + Parent.GetMatrixNetGlobalIndex())
                    count = Parent.GetMatrixNetGlobalIndex() + 1;
                if (Parent.GetMetaMatrixNetGlobalIndex() < Max<ui32>() && count < 1 + Parent.GetMetaMatrixNetGlobalIndex())
                    count = Parent.GetMetaMatrixNetGlobalIndex() + 1;
                if (Parent.GetFullMatrixNetGlobalIndex() < Max<ui32>() && count < 1 + Parent.GetFullMatrixNetGlobalIndex())
                    count = Parent.GetFullMatrixNetGlobalIndex() + 1;
                if (Parent.GetFastMatrixNetGlobalIndex() < Max<ui32>() && count < 1 + Parent.GetFastMatrixNetGlobalIndex())
                    count = Parent.GetFastMatrixNetGlobalIndex() + 1;
                if (Parent.GetFilterMatrixNetGlobalIndex() < Max<ui32>() && count < 1 + Parent.GetFilterMatrixNetGlobalIndex())
                    count = Parent.GetFilterMatrixNetGlobalIndex() + 1;
                if (Parent.GetFastFilterMatrixNetGlobalIndex() < Max<ui32>() && count < 1 + Parent.GetFastFilterMatrixNetGlobalIndex())
                    count = Parent.GetFastFilterMatrixNetGlobalIndex() + 1;
            }
            if (Parent.GetMetaPolynomGlobalIndex() < Max<ui32>()) {
                count = Max(count, Parent.GetMetaPolynomGlobalIndex() + 1);
            }
            if (Parent.GetFullPolynomGlobalIndex() < Max<ui32>()) {
                count = Max(count, Parent.GetFullPolynomGlobalIndex() + 1);
            }
            if (Parent.GetFastPolynomGlobalIndex() < Max<ui32>()) {
                count = Max(count, Parent.GetFastPolynomGlobalIndex() + 1);
            }
            if (Parent.GetFilterPolynomGlobalIndex() < Max<ui32>()) {
                count = Max(count, Parent.GetFilterPolynomGlobalIndex() + 1);
            }
            if (Parent.GetFastFilterPolynomGlobalIndex() < Max<ui32>()) {
                count = Max(count, Parent.GetFastFilterPolynomGlobalIndex() + 1);
            }
            return count;
        }

        const TVector<NRTYFeatures::IFeature::TPtr>& GetRTYFeatures() const {
            return RTYFeatures;
        }

        NJson::TJsonValue GetUnusedFactorsReport() {
            NJson::TJsonValue report;
            NRTYFactors::TConfig::TRankModels rankModels = GetRankModels();
            for (NRTYFactors::TConfig::TRankModels::const_iterator model = rankModels.begin(); model != rankModels.end(); ++model) {
                NJson::TJsonValue& jsonRankModel = report.InsertValue(model->first, NJson::JSON_MAP);
                NJson::TJsonValue unused;
                for (size_t i = 0; i < GetFactorCount(); ++i) {
                    const char* name = GetFactorName(i);
                    if (name && *name) {
                        if (model->second->GetUsedFactors().find(i) != model->second->GetUsedFactors().end())
                            jsonRankModel.InsertValue(ToString(i), name);
                        else
                            unused.InsertValue(ToString(i), name);
                    }
                }
                if (!unused.GetMap().empty())
                    jsonRankModel.InsertValue("unused", unused);
            }
            return report;
        }

        void GetAllFactorsIndexes(TSet<ui32>& factors) const {
            for (size_t i = 0; i < GetFactorCount(); ++i) {
                const char* name = GetFactorName(i);
                if (name && *name) {
                    factors.insert(i);
                }
            }
        }

        const NJson::TJsonValue& GetJsonConfig() const {
            return JsonConfig;
        }

        const TString GetFileName() const {
            return FileName;
        }

        const TFsPath GetModelPath(const TString& value) const {
            if (Y_UNLIKELY(value.empty())) {
                return TFsPath();
            }

            //this is a special convention about paths in standalone_indexer config (see r4378299)
            auto startsWithDots = [](const TString& optionValue) {
                // we cannot use TFsPath::PathSplit() here, because it removes "." and ".."
                return optionValue.StartsWith("./") || optionValue.StartsWith("../");
            };
            TFsPath path(value);
            if (path.IsRelative() && !startsWithDots(value)) {
                path = TFsPath(GetFileName()).Parent() / path;
            }
            return path;
        }

        bool IsInitialized() const {
            return Initialized;
        }

        bool NeedDocsLens() const {
            return DynFactorsConfigTraits.contains(NTraits::DocsLens);
        }

        bool IsStaticOnly(const TUsedFactors& factors) const {
            for (auto&& f : factors) {
                if (Parent.IsFictiveFactor(f)) {
                    continue;
                }

                const TFactor& factor = *GetFactorByFormulaIndex(f);
                switch (factor.FactorType) {
                case ftStatic:
                    break;
                default:
                    return false;
                }
            }
            return true;
        }

        const TBaseDomain* GetBaseDomain() const {
            return Domain.Get();
        }

        const NFactorSlices::TFactorDomain& GetFactorsDomain() const {
            CHECK_WITH_LOG(Domain);
            return Domain->GetDomain();
        }

        i32 GetVersion() const {
            return ConfigVersion;
        }

    private:
        typedef THashMap<TString, size_t> TNameToPosition;
    private:
        void LoadFactors(IFactorsInfoWriter& factors, const NJson::TJsonValue& config, const char* sectionName, TFactorType factorsType);
        void LoadFactors(TFactorsList& factors, const NJson::TJsonValue& config, const char* sectionName, TFactorType factorsType);
        template <class TFactorsHashList, class TFactorsData>
        void LoadHashFactors(TFactorsHashList& factors, const NJson::TJsonValue& config, const char* sectionName, TFactorType factorsType, const TString& prefix) {
            TFactorsHashListWriter<TFactorsHashList, TFactorsData> flw(factors, prefix);
            LoadFactors(flw, config, sectionName, factorsType);
        }
        void LoadRtyFeatures(const NJson::TJsonValue& config, TStringBuf factorsModels);
        void LoadFactorSets(const NJson::TJsonValue& config);
        void LoadStreams(const NJson::TJsonValue& config);
        void LoadImports(const NJson::TJsonValue& config);
        void LoadGeo(const NJson::TJsonValue& config);

        void CreateFactorsDomain(const NJson::TJsonValue* config);

        TSimpleFactorDescription ReadFactor(const NJson::TJsonValue::TMapType::const_iterator& i) const;

        void LoadRankModels(const NJson::TJsonValue& config);
        const TRankModelHolder* GetRankModel(const TStringBuf name, const TRankModelHolder* def, const TStringBuf baseRankModel = {}, const TStringBuf polynom = {}) const;
        const TRankModelHolder* GetCustomRankModel(const TStringBuf formula, const TRankModelHolder* def, const TStringBuf model) const;
        const TRankModelHolder* GetTypedRankModel(const TStringBuf name, const TStringBuf modelTypeName, const TStringBuf polynom = {}) const;
    private:
        TConfig& Parent;
        const TString FileName;
        bool Initialized;
        NJson::TJsonValue JsonConfig;

        TFactorsList ZoneFactors;
        TFactorsList StatFactors;
        TFactorsList IgnoreFactors;

        TFactorsList DynFactors;
        TFactorsList RTYDynFactors;
        TFactorsList UsrFactors;
        TFactorsList SuggestFactors;
        TFactorsList SuggestDictFactors;
        TQSFactorsHashList QuerySpecFactors;
        TCSFactorsHashList CommonStatFactors;
        TAnnFactorsHashList AnnFactors;
        TGeoLayersList GeoLayers;
        TUserFunctionsList UserFunctions;
        TString Formula;
        TMap<ui32, TFactor> FactorsByIndexInFormula;
        TMap<TString, TFactor> FactorsByName;
        TMap<TString, TFactor> StreamsByName;
        TNameToPosition NameToPosition;
        TNameToPosition NameToPositionGlobal;
        mutable TRankModels RankModels;
        mutable TFactorModels FactorModels;
        const TRankModelHolder* DefaultRankModel;
        THashMap<TString, THolder<TFactorSet>> FactorSets;
        TSet<ui32> DynFactorsConfigTraits;

        TRWMutex MutexRankModelGen;
        THashSet<TString> DpFactors;
        THolder<TBaseDomain> Domain;
        TSet<ui32> AllIndices;

        TVector<NRTYFeatures::IFeature::TPtr> RTYFeatures;
        i32 ConfigVersion {-1};
        TVector<ui32> FictiveFactorsGlobalIndex;
    };

    const char* TConfig::GetFactorName(size_t index) const {
        return Impl->GetFactorName(index);
    }

    bool TConfig::IsUnusedFactor(size_t index) const {
        return Impl->IsUnusedFactor(index);
    }

    bool TConfig::GetFactorIndex(const char* name, size_t* index) const {
        return Impl->GetFactorIndex(name, index);
    }

    const TFactor* TConfig::GetFactorByName(TStringBuf name) const {
        return Impl->GetFactorByName(name);
    }

    const TFactor* TConfig::GetFactorByFormulaIndex(size_t indexFormula, bool checkFactorExists) const {
        return Impl->GetFactorByFormulaIndex(indexFormula, checkFactorExists);
    }

    TVector<const TFactor*> TConfig::ListFactorsByRange(size_t beginGlobalIndex, size_t endGlobalIndex) const {
        return Impl->ListFactorsByRange(beginGlobalIndex, endGlobalIndex);
    }

    bool TConfig::GetStreamIndex(const TString& name, size_t* index) const {
        return Impl->GetStreamIndex(name, index);
    }

    const TFactorSet* TConfig::GetFactorSet(const TString& name) const {
        return Impl->GetFactorSet(name);
    }

    void TConfig::TImpl::LoadFactors(
        TFactorsList& factors,
        const NJson::TJsonValue& config,
        const char* sectionName,
        TFactorType factorsType
    ) {
        TFactorsListWriter flw(factors);
        LoadFactors(flw, config, sectionName, factorsType);
    }

    void TConfig::TImpl::LoadRtyFeatures(const NJson::TJsonValue& config, TStringBuf factorsModels) {
        const auto& configMap = config.GetMap();
        auto enabledFeaturesIter = configMap.find("enabled_rty_features");

        TSet<TString> allRtyFeatures;
        TRTYFeaturesFactory::GetRegisteredKeys(allRtyFeatures);

        THashSet<TString> enabledFeatures;
        const bool areAllFeaturesEnabled = enabledFeaturesIter == configMap.end();
        if (!areAllFeaturesEnabled) {
            for (const auto& jsonValue : enabledFeaturesIter->second.GetArray()) {
                const TString& featureName = jsonValue.GetString();
                const bool featureExists = allRtyFeatures.contains(featureName);
                Y_VERIFY(featureExists);

                enabledFeatures.insert(featureName);
            }
        }

        auto addFactor = [this](const NRTYFeatures::TDynFactorInfo& factorInfo) {
            const bool inserted = DpFactors.insert(factorInfo.Name).second;
            Y_VERIFY(inserted);
        };

        for (const auto& featureName : allRtyFeatures) {
            // we should alwaus add all factors
            NRTYFeatures::IFeature::TPtr feature { TRTYFeaturesFactory::Construct(featureName) };
            feature->InitStaticInfo(factorsModels, addFactor);

            if (!areAllFeaturesEnabled && !enabledFeatures.contains(featureName)) {
                continue;
            }
            RTYFeatures.push_back(feature);
        }
    }

    bool TConfig::IsDpFactor(TStringBuf name) const {
        return Impl->IsDpFactor(name);
    }

    TSimpleFactorDescription TConfig::TImpl::ReadFactor(const NJson::TJsonValue::TMapType::const_iterator& i) const {
        TSimpleFactorDescription result(i->first, -1, 32, TBitsReader::ftFloat);
        NJson::TJsonValue::TMapType fields;
        if (i->second.IsUInteger()) {
            result.IndexGlobal = i->second.GetUIntegerRobust();
        } else if (i->second.GetMap(&fields)) {
            for (NJson::TJsonValue::TMapType::const_iterator f = fields.begin(), fe = fields.end(); f != fe; ++f) {
                if (f->first == "type") {
                    TString type;
                    if (f->second.GetString(&type)) {
                        if (type == "f")
                            result.Type = TBitsReader::ftFloat;
                        else if (type == "i")
                            result.Type = TBitsReader::ftInt;
                        else
                            ythrow yexception() << "Incorrect \"type\" field value: " << i->first;
                    } else {
                        ythrow yexception() << "Incorrect \"type\" field type: " << i->first;
                    }
                } else if (f->first == "index") {
                    if (f->second.IsUInteger())
                        result.IndexGlobal = f->second.GetUIntegerRobust();
                    else
                        ythrow yexception() << "Incorrect \"IndexGlobal\" field type: " << i->first;
                } else if (f->first == "width") {
                    if (f->second.IsUInteger())
                        result.Width = f->second.GetUIntegerRobust();
                    else
                        ythrow yexception() << "Incorrect \"Width\" field type: " << i->first;
                } else if (f->first == "default_value") {
                    if (f->second.IsUInteger())
                        result.DefaultValue = f->second.GetUIntegerRobust();
                    else if (f->second.IsDouble())
                        result.DefaultValue = f->second.GetDoubleRobust();
                    else
                        ythrow yexception() << "Incorrect \"default_value\" field type: " << i->first;
                } else if (f->first == "tags") {
                    // can be used to work on quality (select features by tags, deprecate features, etc.)
                } else {
                    ythrow yexception() << "Undefined field " << f->first << " for " << i->first;
                }
            }
        } else
            ythrow yexception() << "Incorrect factors config field: " << i->first;
        return result;
    }

    void TConfig::TImpl::LoadImports(const NJson::TJsonValue& config) {
        const auto& section = config["user_functions"];
        for (const auto& [name, componentName]: section.GetMap()) {
            UserFunctions[name] = componentName.GetString();
        }
    }

    void TConfig::TImpl::LoadStreams(const NJson::TJsonValue& config) {
        TFactorsHashListWriter<TAnnFactorsHashList, TAnnFactorsList> flw(AnnFactors, "");
        TSet<ui32> placedStreams;
        for (const auto& streamDescr : config["ann_streams"].GetMap()) {
            const bool userStream = streamDescr.second.Has("stream_id");
            const bool hasUserType = streamDescr.second.Has("value_type");
            if (userStream && !hasUserType)
                ythrow yexception() << "user stream '" << streamDescr.first << "' without value type";

            if (!userStream && hasUserType)
                ythrow yexception() << "definition of value type for common stream '" << streamDescr.first << "' is not allowed";

            const ui32 streamId = userStream ?
                streamDescr.second["stream_id"].GetUInteger() :
                NIndexAnn::TDataTypeConcern::Instance().GetID(streamDescr.first);
            //Note(yrum): ftAnn is not really a factor - it is a stream that may contain multiple values for one docid
            TFactor factor(TSimpleFactorDescription(streamDescr.first,
                            streamId, 0, TBitsReader::ftFloat), ftAnn);
            if (factor.IndexGlobal == NIndexAnn::DT_NONE)
                ythrow yexception() << "unknown stream " << streamDescr.first;
            if (!placedStreams.insert(factor.IndexGlobal).second)
                ythrow yexception() << "duplicate stream " << streamDescr.first;

            TString dest = streamDescr.second["index_type"].GetString();
            if (dest == "SI_ANN")
                dest = "ann";
            else if (dest == "SI_FACTORANN")
                dest = "factorann";
            else
                ythrow yexception() << "incorrect stream index_type for " << streamDescr.first << ": " << dest;
            if (hasUserType) {
                const auto userType = streamDescr.second["value_type"].GetString();
                if (userType == "ui8") {
                    factor.Width = sizeof(ui8) * 8;
                } else if (userType == "float") {
                    factor.Width = sizeof(float) * 8;
                } else {
                    ythrow yexception() << "unsupported stream value type '" << userType << "'";
                }
            } else {
                factor.Width = NIndexAnn::TDataTypeConcern::Instance().AvialiableTypes.at(factor.IndexGlobal) * 8;
            }
            flw.Write(dest, factor);
            // StreamsByName[factor.Name] = factor; // no need in this, uncomment when needed
        }
        flw.Finish();
    }

    void TConfig::TImpl::LoadGeo(const NJson::TJsonValue& config) {
        //Note(yrum): we do not use TFactorsHashListWriter and a hashmap here, because there is only one "Function" (see "dest" variable in LoadStreams()).
        //In other words, there has been no need to split the geo data into "ann" and "factorann" parts.
        TSet<ui32> placedStreams;
        for (const auto& [name, details] : config["geo_layers"].GetMap()) {
            Y_ENSURE(details.Has("stream_id"), "GeoLayer should have stream_id specified");
            const ui32 layerId = details["stream_id"].GetUInteger();
            //Note(yrum): ftGeoLayer is not really a factor - it is a stream that may contain multiple coordinates for one docid
            TFactor factor(TSimpleFactorDescription(name, layerId, 0, TBitsReader::ftFloat), ftGeoLayer);
            if (!placedStreams.insert(factor.IndexGlobal).second)
                ythrow yexception() << "duplicate stream " << name;
            if (layerId >= Max<ui8>())
                ythrow yexception() << "incorrect stream id " << layerId << " for layer " << name;

            GeoLayers.push_back(factor);
        }
        if (GeoLayers.empty()) {
            TFactor factor(TSimpleFactorDescription("coords", /*layerId=*/0, 0, TBitsReader::ftFloat), ftGeoLayer);
            GeoLayers.push_back(factor);
        }
        for (const TFactor& f: GeoLayers) {
            StreamsByName[f.Name] = f;
        }
    }

    void TConfig::TImpl::LoadFactors(
        IFactorsInfoWriter& factors,
        const NJson::TJsonValue& config,
        const char* sectionName,
        TFactorType factorType
    ) {
        const NJson::TJsonValue* vFactors;
        TRegExMatch re(sectionName);
        VERIFY_WITH_LOG(re.IsCompiled(), "Incorrect regular expr");
        factors.Clear();
        NJson::TJsonValue::TMapType mapValues;
        VERIFY_WITH_LOG(config.GetMap(&mapValues), "Incorrect factors config format");
        TVector<TIndexWebBase> fictiveFactorsBaseIdx(EFictiveFactors::COUNT);

        fictiveFactorsBaseIdx[EFictiveFactors::MATRIX_NET] = Domain->GetBaseIdx(FI_MATRIXNET);
        fictiveFactorsBaseIdx[EFictiveFactors::META_MATRIX_NET] = Domain->GetBaseIdx(FI_META_MATRIX_NET);
        fictiveFactorsBaseIdx[EFictiveFactors::FULL_MATRIX_NET] = Domain->GetBaseIdx(FI_FULL_MATRIX_NET);
        fictiveFactorsBaseIdx[EFictiveFactors::FAST_MATRIX_NET] = Domain->GetBaseIdx(FI_FAST_MATRIX_NET);
        fictiveFactorsBaseIdx[EFictiveFactors::FILTER_MATRIX_NET] = Domain->GetBaseIdx(FI_FILTER_MATRIX_NET);
        fictiveFactorsBaseIdx[EFictiveFactors::FAST_FILTER_MATRIX_NET] = Domain->GetBaseIdx(FI_FAST_FILTER_MATRIX_NET);
        fictiveFactorsBaseIdx[EFictiveFactors::META_POLYNOM] = Domain->GetBaseIdx(FI_META_POLYNOM);
        fictiveFactorsBaseIdx[EFictiveFactors::FULL_POLYNOM] = Domain->GetBaseIdx(FI_FULL_POLYNOM);
        fictiveFactorsBaseIdx[EFictiveFactors::FAST_POLYNOM] = Domain->GetBaseIdx(FI_FAST_POLYNOM);
        fictiveFactorsBaseIdx[EFictiveFactors::FILTER_POLYNOM] = Domain->GetBaseIdx(FI_FILTER_POLYNOM);
        fictiveFactorsBaseIdx[EFictiveFactors::FAST_FILTER_POLYNOM] = Domain->GetBaseIdx(FI_FAST_FILTER_POLYNOM);

        for (NJson::TJsonValue::TMapType::const_iterator itm = mapValues.begin(), endm = mapValues.end(); itm != endm; ++itm) {
            if (re.Match(itm->first.data())) {
                vFactors = &itm->second;
                const NJson::TJsonValue::TMapType* map;
                if (!vFactors->GetMapPointer(&map))
                    ythrow yexception() << "invalid config, " << sectionName << " must be map: names to global index";
                TMap<ui32, TFactor> factorsMap;
                const bool dynamic = itm->first == "dynamic_factors";
                for (NJson::TJsonValue::TMapType::const_iterator i = map->begin(); i != map->end(); ++i) {
                    TSimpleFactorDescription sfd = ReadFactor(i);
                    TIndexWebBase indexBase = TIndexWebBase::Invalid();
                    if (dynamic) {
                        indexBase = Domain->GetBaseIdx(sfd.Name, {}); // throws
                        //TODO(yrum): provide the second parameter via config
                    }
                    auto fictiveFactorCandidate = std::find(fictiveFactorsBaseIdx.begin(),
                                                            fictiveFactorsBaseIdx.end(), indexBase);
                    if (fictiveFactorCandidate != fictiveFactorsBaseIdx.end()) {
                        Parent.GetFictiveFactorsGlobalIndex()[fictiveFactorCandidate - fictiveFactorsBaseIdx.begin()] = sfd.IndexGlobal;
                    }

                    if (!factorsMap.insert(std::make_pair(sfd.IndexGlobal, TFactor(sfd, indexBase, factorType))).second)
                        ythrow yexception() << "index of a factor is not unique: '" << sfd.Name << "'";
                    if (!AllIndices.insert(sfd.IndexGlobal).second)
                        ythrow yexception() << "index of a factor is not unique (has in another section): '" << sfd.Name << "'";
                }
                ui32 index = 0;
                for (TMap<ui32, TFactor>::const_iterator i = factorsMap.begin(); i != factorsMap.end(); ++i) {
                    factors.Write(itm->first, i->second);
                    NameToPosition[i->second.Name] = index;
                    NameToPositionGlobal[i->second.Name] = i->first;
                    FactorsByIndexInFormula[i->second.IndexGlobal] = i->second;
                    FactorsByName[i->second.Name] = i->second;
                    Domain->GetTrait(DynFactorsConfigTraits, i->second.IndexBase);
                    ++index;
                }
            }
        }
        factors.Finish();
    }

    TConfig::TConfig(const char* filename, TStringBuf factorsModels, TConfigPreprocessor* configPreprocessor, i32 configVersion)
        : Impl(MakeHolder<TImpl>(*this, filename, configVersion))
    {
        Impl->Init(factorsModels, configPreprocessor);
    }

    TConfig::~TConfig()
    {}

    const TFactorsList& TConfig::StaticFactors() const {
        return Impl->StaticFactors();
    }

    const NRTYFactors::TFactorsList& TConfig::IgnoredFactors() const {
        return Impl->IgnoredFactors();
    }

    const TFactorsList& TConfig::GetZoneFactors() const {
        return Impl->GetZoneFactors();
    }

    const TFactorsList& TConfig::RtyDynamicFactors() const {
        return Impl->RtyDynamicFactors();
    }

    const TFactorsList& TConfig::GetSuggestFactors() const {
        return Impl->GetSuggestFactors();
    }

    const TFactorsList& TConfig::GetSuggestDictFactors() const {
        return Impl->GetSuggestDictFactors();
    }

    const TQSFactorsHashList& TConfig::GetQuerySpecFactors() const {
        return Impl->GetQuerySpecFactors();
    }

    bool TConfig::IsZoneUsed(const char* name) const {
        return Impl->IsZoneUsed(name);
    }

    const TQSFactorsList::TPtr TConfig::GetQuerySpecFactors(const TString& fName) const {
        return Impl->GetQuerySpecFactors(fName);
    }

    const TCSFactorsHashList& TConfig::GetCommonStatFactors() const {
        return Impl->GetCommonStatFactors();
    }

    const TAnnFactorsHashList& TConfig::GetAnnStreams() const {
        return Impl->GetAnnStreams();
    }

    const TAnnFactorsList::TPtr TConfig::GetAnnStreams(const TString& fName) const {
        return Impl->GetAnnStreams(fName);
    }

    const TCSFactorsList::TPtr TConfig::GetCommonStatFactors(const TString& fName) const {
        return Impl->GetCommonStatFactors(fName);
    }

    const TGeoLayersList& TConfig::GetGeoLayers() const {
        return Impl->GetGeoLayers();
    }

    const TUserFunctionsList& TConfig::GetUserFunctions() const {
        return Impl->GetUserFunctions();
    }

    const TFactorsList& TConfig::DynamicFactors() const {
        return Impl->DynamicFactors();
    }

    const TFactorsList& TConfig::UserFactors() const {
        return Impl->UserFactors();
    }

    const TFactorsList* TConfig::GetFactorsByRtyType(const TString& userInput) const {
        return Impl->GetFactorsByRtyType(userInput);
    }

    size_t TConfig::GetFactorGlobalNum(const char* name, size_t length) const {
        const size_t len = (length == TString::npos) ? TString::StrLen(name) : length;
        return GetFactorGlobalNum(TStringBuf(name, len));
    }

    size_t TConfig::GetFactorGlobalNum(TStringBuf name) const {
        return Impl->GetFactorGlobalNum(name);
    }

    size_t TConfig::GetFactorNum(const TString& name) const {
        return Impl->GetFactorNum(name);
    }

    TString TConfig::GetKey() const {
        return Impl->GetKey();
    }

    const TFsPath TConfig::GetModelPath(const TString& value) const {
        return Impl->GetModelPath(value);
    }

    void TConfig::TImpl::LoadRankModels(const NJson::TJsonValue& config) {
        TWriteGuard g(MutexRankModelGen);
        DefaultRankModel = nullptr;
        RankModels.clear();
        if (!config.Has("formulas"))
            return;
        const NJson::TJsonValue::TMapType* map;
        if (!config["formulas"].GetMapPointer(&map))
            ythrow yexception() << "invalid config, formulas must be map: names to rankmodel";
        for (NJson::TJsonValue::TMapType::const_iterator i = map->begin(); i != map->end(); ++i) {
            const TString& name = i->first;
            TRankModelHolder* rmh = new TRankModelHolder(name, i->second, &Parent);
            if (name == "default") {
                DefaultRankModel = rmh;
            }
            RankModels[name] = rmh;
        }
    }

    void TConfig::TImpl::LoadFactorSets(const NJson::TJsonValue& config) {
        FactorSets.clear();
        if (!config.Has("factor_sets"))
            return;
        const NJson::TJsonValue::TMapType* map;
        Y_ENSURE(config["factor_sets"].GetMapPointer(&map), "invalid config, factor_sets section must be a map");
        for (auto&& [name, value]: *map) {
            THolder<TFactorSet> factorSet = MakeHolder<TFactorSet>(name, value, &Parent);
            FactorSets[name] = std::move(factorSet);
        }
    }

    void TConfig::TImpl::CreateFactorsDomain(const NJson::TJsonValue* configPtr) {
        TSet<NFactorSlices::EFactorSlice> slices;
        slices.insert(EFactorSlice::WEB_PRODUCTION);
        slices.insert(EFactorSlice::BEGEMOT_QUERY_FACTORS);
        if (configPtr) {
            const NJson::TJsonValue& config = *configPtr;
            for (const auto& sliceName : config["enabled_factor_slices"].GetArray()) {
                NFactorSlices::EFactorSlice slice;
                if (!FromString(sliceName.GetStringRobust(), slice)) {
                    ythrow yexception() << "unknown factor slice: " << sliceName.GetStringRobust();
                }
                slices.insert(slice);
            }
        }

        Domain = MakeHolder<TBaseDomain>(slices, NFactorSlices::EFactorUniverse::DEFAULT);
        Domain->SetTrait(EFactorSlice::WEB_PRODUCTION, FI_TLEN, NTraits::DocsLens);
        Domain->SetTrait(EFactorSlice::WEB_PRODUCTION, FI_DOC_LEN, NTraits::DocsLens);
    }

    const TRankModelHolder* TConfig::TImpl::GetRankModel(const TStringBuf name, const TRankModelHolder* def, const TStringBuf baseRankModel, const TStringBuf polynom) const {
        if (!name && !polynom)
            return def;
        // custom polynom has higher priority
        if(!!polynom) {
            TStringBuf model = !!baseRankModel ? baseRankModel : name;
            return GetCustomRankModel(polynom, def, model);
        }
        return GetCustomRankModel(name, def, baseRankModel);
    }

    const TRankModelHolder* TConfig::TImpl::GetCustomRankModel(const TStringBuf formula, const TRankModelHolder* def, const TStringBuf model) const {
        // concatenation is used to distinguish equal polynoms with different base models
        const TString key = TString::Join(formula, model);

        TRankModels::const_iterator i1;
        {// concatenation is used to distinguish equal polynoms with different base models
            TReadGuard g(MutexRankModelGen);
            i1 = RankModels.find(key);
            if (i1 != RankModels.end()) {
                return i1->second.Get();
            }
        }
        {
            TWriteGuard g(MutexRankModelGen);
            if ((i1 = RankModels.find(key)) != RankModels.end())
                return i1->second.Get();
            SRelevanceFormula relevanceFormula;
            bool useMatrixNet = false;
            try {
                Decode(&relevanceFormula, formula, GetFactorsCountByIndex());
                TSet<ui32> factorsFormula;
                relevanceFormula.UsedFactors(factorsFormula);
                for (TSet<ui32>::const_iterator i2 = factorsFormula.begin(); i2 != factorsFormula.end(); ++i2) {
                    if (Parent.GetMatrixNetGlobalIndex() == *i2) { // Consider using fullMatrixNet and FastMatrixNet factors
                        useMatrixNet = true;
                        continue;
                    }
                    GetFactorByFormulaIndex(*i2);
                }
            } catch (...) {
                DEBUG_LOG << "Incorrect formula data: " << formula << Endl;
                return def;
            }
            const TRankModelHolder* baseModelPtr = nullptr;
            if (useMatrixNet) {
                baseModelPtr = def ? def : DefaultRankModel;
                if (model) {
                    TRankModels::const_iterator it = RankModels.find(model);
                    if (it != RankModels.end())
                        baseModelPtr = it->second.Get();
                }
            }
            TRankModelHolder* rmh = new TRankModelHolder(formula, relevanceFormula, &Parent, baseModelPtr);
            RankModels[key] = rmh;
            return rmh;
        }
    }

    TConfig::TRankModels TConfig::GetRankModels() const {
        return Impl->GetRankModels();
    }

    const TRankModelHolder* TConfig::GetRankModel(const TStringBuf name, const TStringBuf baseRankModel, const TStringBuf polynom) const {
        return Impl->GetRankModel(name, baseRankModel, polynom);
    }

    const TRankModelHolder* TConfig::TImpl::GetTypedRankModel(const TStringBuf name, const TStringBuf modelTypeName, const TStringBuf polynom) const {
        TRankModelHolder *defaultRankModel = nullptr;
        {
            TReadGuard g(MutexRankModelGen);
            TRankModels::const_iterator i = RankModels.find(modelTypeName);
            if (i != RankModels.end()) {
                defaultRankModel = i->second.Get();
            }
        }
        return GetRankModel(name, defaultRankModel, TStringBuf(), polynom);
    }

    const TRankModelHolder* TConfig::GetFastRankModel(const TStringBuf name, const TStringBuf polynom) const {
        return Impl->GetFastRankModel(name, polynom);
    }

    const TRankModelHolder* TConfig::GetFilterModel(const TStringBuf name, const TStringBuf polynom) const {
        return Impl->GetFilterModel(name, polynom);
    }

    const TRankModelHolder* TConfig::GetFastFilterModel(const TStringBuf name, const TStringBuf polynom) const {
        return Impl->GetFastFilterModel(name, polynom);
    }

    const TRankModelHolder* TConfig::GetL3RankModel(const TStringBuf name) const {
        return Impl->GetL3RankModel(name);
    }

    bool TConfig::HasRankModel(const TString& name) const {
        return Impl->HasRankModel(name);
    }

    TConfig::TFactorModels& TConfig::GetFactorModels() const {
        return Impl->GetFactorModels();
    }

    size_t TConfig::GetFactorCount() const {
        return Impl->GetFactorCount();
    }

    void TConfig::CopyFactors(const TFactorChunks& chunks, const TFactorStorage& src, TFactorStorage& dst) {
        for (TFactorChunks::const_iterator i = chunks.begin(); i != chunks.end(); ++i) {
            memcpy(dst.factors + i->IndexDst, src.factors + i->IndexSrc, i->Length * sizeof(float));
        }
    }

    void TConfig::BuildChunks(TFactorChunks& chunks, const TMap<ui32, ui32>& srcToDest) {
        chunks.clear();
        ui32 prevSrc = Max<ui32>() - 1;
        ui32 prevDst = Max<ui32>() - 1;
        for (TMap<ui32, ui32>::const_iterator i = srcToDest.begin(); i != srcToDest.end(); ++i) {
            if (i->first != prevSrc + 1 || i->second != prevDst + 1)
                chunks.push_back(TFactorChunk(i->first, i->second));
            else
                ++chunks.back().Length;
            prevSrc = i->first;
            prevDst = i->second;
        }
    }

    // TODO: This method is pure virtual now in class IFactorsInfo,
    // so it should be defined here
    const char* const* TConfig::GetFactorNames() const {
        return nullptr;
    }

    void TConfig::GetAllFactorsIndexes(TSet<ui32>& factors) const {
        for (size_t i = 0; i < GetFactorCount(); ++i) {
            const char* name = GetFactorName(i);
            if (name && *name)
                factors.insert(i);
        }
    }

    const TVector<NRTYFeatures::IFeature::TPtr>& TConfig::GetRTYFeatures() const {
        return Impl->GetRTYFeatures();
    }

    NJson::TJsonValue TConfig::GetUnusedFactorsReport() {
        return Impl->GetUnusedFactorsReport();
    }

    ui32 TConfig::GetFactorsCountByIndex(bool withMn) const {
        return Impl->GetFactorsCountByIndex(withMn);
    }

    bool TConfig::NeedDocsLens() const {
        return Impl->NeedDocsLens();
    }

    bool TConfig::IsStaticOnly(const TUsedFactors& factors) const {
        for (auto&& f : factors) {
            if (IsFictiveFactor(f)) {
                continue;
            }

            const TFactor& factor = *GetFactorByFormulaIndex(f);
            switch (factor.FactorType) {
            case ftStatic:
                break;
            default:
                return false;
            }
        }
        return true;
    }

    const NFactorSlices::TFactorDomain& TConfig::GetFactorsDomain() const {
        return Impl->GetFactorsDomain();
    }

    i32 TConfig::GetVersion() const {
        return Impl->GetVersion();
    }

    bool TConfig::IsInitialized() const {
        return Impl->IsInitialized();
    }

    const TBaseDomain* TConfig::GetBaseDomain() const {
        return Impl->GetBaseDomain();
    }

    const NJson::TJsonValue& TConfig::GetJsonConfig() const {
        return Impl->GetJsonConfig();
    }
}
