#include "index_component.h"
#include "index_builder.h"
#include "index_manager.h"
#include "index_config.h"

#include "docurl_normalizer.h"
#include "frq_normalizer.h"
#include "yndex_normalizer.h"
#include "pruning_normalizer.h"
#include "sent_normalizer.h"
#include "group_attrs_normalizer.h"
#include "tf_normalizer.h"

#include <search/config/virthost.h>
#include <saas/rtyserver/factors/factors_config.h>
#include <saas/rtyserver/components/generator/kpsinfo_normalizer.h>
#include <saas/rtyserver/components/search/normalizers/multipart_normalizer.h>
#include <saas/rtyserver/components/search/normalizers/invhash_normalizer.h>
#include <saas/rtyserver/components/search/normalizers/search_archive_normalizer.h>
#include <saas/rtyserver/config/common_indexers_config.h>
#include <saas/rtyserver/config/grouping_config.h>
#include <saas/rtyserver/config/indexer_config.h>
#include <saas/rtyserver/config/searcher_config.h>

#include <library/cpp/digest/md5/md5.h>

#include <util/generic/ptr.h>

namespace {
    const TIndexFileEntry PrefetchedIndexFiles[] = {
        { "index.docurl", 0.0f, 1.0f },
        { "indexfrq", 0.0f, 1.0f },
        { "indexsent", 0.0f, 1.0f }
    };
    const TString NonPrefetchedIndexFile[] = {
        "indexarc",
        "indexdir",
        "indexlen"
    };
}

TRTYIndexComponent::TRTYIndexComponent(const TRTYServerConfig& config)
    : TBaseIndexComponent(config, config.IndexGenerator == INDEX_COMPONENT_NAME)
    , Normalizers(config)
    , IndexConfig(*config.ComponentsConfig.Get<TRTYIndexComponentConfig>(INDEX_COMPONENT_NAME))
{
    const bool defaultChecked = true;

    for (ui32 i = 0; i < Y_ARRAY_SIZE(PrefetchedIndexFiles); ++i) {
        auto file = GetIndexFile(PrefetchedIndexFiles[i], TIndexFile::ppPrefetch, defaultChecked);
        RegisterIndexFile(file);
    }

    for (ui32 i = 0; i < Y_ARRAY_SIZE(NonPrefetchedIndexFile); ++i) {
        RegisterIndexFile(TIndexFile(NonPrefetchedIndexFile[i], defaultChecked, TIndexFile::ppDisable));
    }

    // NB: внимание, если какой-либо из нормалайзеров перестраивает формат индекса в зависимости от конфигурации
    // (меняет формат архива и т.д.), соответствующий параметр должен дампаться в GetInfoChecker - иначе rtyserver "забудет" его вызвать
    // при изменении параметров

    Normalizers.Register(new TSearchArchiveNormalizer(Config));
    if (Config.GetSearcherConfig().ArchiveType == AT_MULTIPART) {
        Normalizers.Register(new TMultipartNormalizer(Config, "indexarc", Config.GetCommonIndexers().TextArchiveParams));
    }
    Normalizers.Register(new TIndexFrqNormalizer(Config));
    Normalizers.Register(new TYndexNormalizer(Config));
    Normalizers.Register(new TPruningNormalizer(Config));
    Normalizers.Register(new TSentenceLengthsNormalizer(Config, IndexConfig.GetWriteSentLens()));
    Normalizers.Register(new TGroupAttrsNormalizer(Config));
    Normalizers.Register(new TKpsInfoNormalizer(Config));
    Normalizers.Register(new TInvHashNormalizer(Config));
    Normalizers.Register(new TIndexTfNormalizer(Config));
    Normalizers.Register(new TDocUrlNormalizer(Config));
}

NRTYServer::IComponentParser::TPtr TRTYIndexComponent::BuildParser() const {
    return new TIndexComponentParser{};
}

NRTYServer::IParsedEntity::TPtr TRTYIndexComponent::BuildParsedEntity(NRTYServer::IParsedEntity::TConstructParams& params) const {
    return new TIndexParsedEntity(params);
}

THolder<NRTYServer::IIndexComponentManager> TRTYIndexComponent::CreateManager(const NRTYServer::TManagerConstructionContext& context) const {
    return MakeHolder<TRTYIndexManager>(context, INDEX_COMPONENT_NAME);
}

THolder<NRTYServer::IIndexComponentBuilder> TRTYIndexComponent::CreateBuilder(const NRTYServer::TBuilderConstructionContext& context) const {
    if (context.Config.GetType() == "memory") {
        return nullptr;
    }

    return MakeHolder<TRTYIndexBuilder>(context.Config, context.DirConfig, context.BaseConfig, GetName());
}

bool TRTYIndexComponent::DoMerge(const NRTYServer::TMergeContext& context) const {
    // Lock indexation on callback after archive merge
    INFO_LOG << "Docs count before merge = " << context.Context.Decoder->GetSize() << " / " << context.Context.Decoder->GetNewDocsCount() << Endl;
    TRTYMerger::TContext mc(context.Context);
    mc.IgnoreNoAttrs = true;

    ui64 mergerOps = TRTYMerger::otKI | TRTYMerger::otAttrs | TRTYMerger::otLength | TRTYMerger::otDocUrl;

    mc.WriteSentLens = IndexConfig.GetWriteSentLens();

    switch (Config.GetSearcherConfig().ArchiveType) {
        case AT_MULTIPART:
        {
            mergerOps |= TRTYMerger::otMpArch;
            mc.MultipartMergerContext.Reset(new NRTYArchive::TMultipartMergerContext(Config.GetCommonIndexers().TextArchiveParams));
            mc.MultipartArcFile = "indexarc";
            break;
        }
        case AT_FLAT:
            mergerOps |= TRTYMerger::otArch;
            break;
        default:
            FAIL_LOG("unknown archive type");
    }

    TRTYMerger rtyMerger(context.RigidStopSignal, mergerOps);
    if (!rtyMerger.MergeIndicies(mc))
        return false;

    INFO_LOG << "Docs count after merge = " << context.Context.Decoder->GetSize() << " / " << context.Context.Decoder->GetNewDocsCount() << Endl;
    return true;
}

bool TRTYIndexComponent::DoAllRight(const NRTYServer::TNormalizerContext& context) const {
    if (context.IndexType == IIndexController::EIndexType::PREPARED) {
        return true;
    }
    const THolder<TFileMap>& fileIndexFrq = context.Managers.GetManager<TRTYIndexManager>(INDEX_COMPONENT_NAME)->GetFileIndexFrq();
    return Normalizers.AllRight(context, fileIndexFrq);
}

void TRTYIndexComponent::CheckAndFix(const NRTYServer::TNormalizerContext& context) const {
    if (context.IndexType == IIndexController::EIndexType::PREPARED) {
        return;
    }
    const THolder<TFileMap>& fileIndexFrq = context.Managers.GetManager<TRTYIndexManager>(INDEX_COMPONENT_NAME)->GetFileIndexFrq();
    Normalizers.CheckAndFix(context, fileIndexFrq);
}

bool TRTYIndexComponent::GetInfoChecker(NRTYServer::TInfoChecker& info) const {
    if (!TBaseIndexComponent::GetInfoChecker(info))
        return false;
    info.Version = 2;
    MD5 md5;
    md5.Update("Pruning:").Update(Config.Pruning->ToString());
    md5.Update(", Groups:").Update(Config.GetCommonIndexers().GroupingConfig->ToString());
    if (Config.GetSearcherConfig().ArchiveType == AT_FLAT)
        md5.Update(", ArchiveType:").Update(ToString(Config.GetSearcherConfig().ArchiveType));
    md5.Update(", Normalizers:").Update(Normalizers.GetNormalizersNames());
    md5.Update(", IndexesBuiltByNormalizers:");
    if (Config.GetSearcherConfig().EnableUrlHash)
        md5.Update("InvHash");

    TStringStream ss;
    IndexConfig.ToString(ss);
    md5.Update(", Config:").Update(ss.Str());
    char buf[33];
    info.AdditionalHash = md5.End(buf);
    return true;
}

bool TRTYIndexComponent::CheckConfig() const {
    if (!TBaseIndexComponent::CheckConfig())
        return false;
    if (!IndexConfig.GetWriteSentLens() && Config.GetSearcherConfig().Factors && !Config.GetSearcherConfig().Factors->DynamicFactors().empty()) {
        ERROR_LOG << "No dynamic factors without SentLens!";
        return false;
    }
    return true;
}
