#include "component.h"
#include "config.h"
#include <library/cpp/streams/special/buffered_throttled_file.h>
#include <saas/rtyserver/config/sys_env.h>
#include <saas/library/mapping/mapping.h>
#include <saas/library/hash_to_docid/hash_to_docid.h>

namespace {
    const TString IndexFile = "minhash2docid";

    bool IsUsed(const TRTYServerConfig& config) {
        return config.UrlToDocIdManager == MINHASH2DOCID_COMPONENT_NAME || config.ComponentsSet.contains(MINHASH2DOCID_COMPONENT_NAME);
    }
}

TMinHashToDocIdComponent::TMinHashToDocIdComponent(const TRTYServerConfig& config)
    : IIndexComponent(IsUsed(config))
    , ComponentConfig(*(config.ComponentsConfig.Get<TMinHashToDocIdConfig>(MINHASH2DOCID_COMPONENT_NAME)))
    , Config(config)
{
    // prefetch policy is ppDisable:
    // we don't need to lock index file, because data is loading to RssAnon.
    // Using memory mapping ticket: SAAS-4918
    IndexFiles.insert(TIndexFile(IndexFile, true, TIndexFile::ppDisable));
}

THolder<NRTYServer::IIndexComponentBuilder> TMinHashToDocIdComponent::CreateBuilder(const NRTYServer::TBuilderConstructionContext&) const {
    return MakeHolder<TMinHashToDocIdBuilder>(IndexFile, MINHASH2DOCID_COMPONENT_NAME);
}

THolder<NRTYServer::IIndexComponentManager> TMinHashToDocIdComponent::CreateManager(const NRTYServer::TManagerConstructionContext& context) const {
    if (context.IndexType != IIndexController::FINAL && context.IndexType != IIndexController::PREPARED) {
        return nullptr;
    }
    return MakeHolder<TMinHashToDocIdManager>(TFsPath(context.Dir.PathName() + "/" + IndexFile), MINHASH2DOCID_COMPONENT_NAME);
}

NRTYServer::IComponentParser::TPtr TMinHashToDocIdComponent::BuildParser() const {
    return nullptr;
}

NRTYServer::IParsedEntity::TPtr TMinHashToDocIdComponent::BuildParsedEntity(NRTYServer::IParsedEntity::TConstructParams&) const {
    return nullptr;
}

bool TMinHashToDocIdComponent::DoAllRight(const NRTYServer::TNormalizerContext& context) const {
    const TString filePath = context.Dir.PathName() + "/" + IndexFile;
    return NFs::Exists(filePath);
}

void TMinHashToDocIdComponent::CheckAndFix(const NRTYServer::TNormalizerContext& context) const {
    const TString filePath = context.Dir.PathName() + "/" + IndexFile;
    bool isCorrupted = false;
    bool isReadOnly = Config.IsReadOnly;
    ui32 docCount = context.Managers.GetDocumentsCount();
    THolder<NMinHash::TTable<ui32, NMinHash::TChdMinHashFunc>> hashTable = nullptr;
    if (!NFs::Exists(filePath)) {
        isCorrupted = true;
        DEBUG_LOG << filePath << "does not exist. Create it" << Endl;
        AssertCorrectIndex(!isReadOnly, "Index File " + filePath + " doesn't exist and we can't create it in read-only mode");
    }
    try {
        hashTable.Reset(new NMinHash::TTable<ui32, NMinHash::TChdMinHashFunc>(filePath)); // Check config params
    } catch (...) {
        NOTICE_LOG << "Can't open minhash data: " << CurrentExceptionMessage() << Endl;
        isCorrupted = true;
    }

    if (isCorrupted) {
        NOTICE_LOG << filePath << " is corrupted!" << Endl;
    }
    AssertCorrectIndex(!isReadOnly || !isCorrupted, "MinHash2DocId is corrupted and cannot be repaired in read-only mode");
    if (isCorrupted) {
        auto docIterator = context.Managers.GetDocSearchInfoIterator();
        Y_VERIFY(docIterator);

        TVector<NSaas::TDocHashWithId> items;
        items.reserve(docCount);
        TVector<TString> keys;
        keys.reserve(docCount);
        for (; docIterator->IsValid(); docIterator->Next()) {
            items.emplace_back(docIterator->GetHashWithId());
            keys.push_back(items.back().Hash.Quote());
        }
        NMinHash::TChdHashBuilder builder(keys.size(), ComponentConfig.GetLoadFactor(),
                                            ComponentConfig.GetKeysPerBucket(), ComponentConfig.GetSeed(),
                                            ComponentConfig.GetErrorBits());
        TAutoPtr<NMinHash::TChdMinHashFunc> hashFunc = builder.Build(keys);

        TVector<ui32> values(keys.size());
        for (auto item : items) {
            TString key = item.Hash.Quote();
            values[hashFunc->Get(key.c_str(), key.size(), false)] = item.Id;
        }
        hashTable.Reset(new NMinHash::TTable<ui32, NMinHash::TChdMinHashFunc>(hashFunc));
        hashTable->Add(values.begin(), values.end());
        if (ComponentConfig.GetMaxBytesPerSecond()) {
            TBufferedThrottledFileOutputStream file(filePath, ComponentConfig.GetMaxBytesPerSecond());
            hashTable->Save(&file);
        } else {
            TBufferedThrottledFileOutputStream file(filePath, Config.GetSysEnv().Io.find("BatchWrite")->second);
            hashTable->Save(&file);
        }
        INFO_LOG << "Rebuilt " << filePath << Endl;
    }
}
