#include "docurl_normalizer.h"

#include <kernel/doc_url_index/doc_url_index.h>

#include <library/cpp/on_disk/2d_array/array2d_writer.h>

TDocUrlNormalizer::TDocUrlNormalizer(const TRTYServerConfig& config)
    : NRTYServer::INormalizer(config)
{
}

const char* TDocUrlNormalizer::Name() const {
    return "DocUrl";
}

bool TDocUrlNormalizer::AllRight(const NRTYServer::TNormalizerContext& context, const THolder<TFileMap>&) const {
    const TFsPath path = TFsPath(context.Dir.PathName()) / "index.docurl";
    return NFs::Exists(path);
}

void TDocUrlNormalizer::Fix(const NRTYServer::TNormalizerContext& context, const THolder<TFileMap>&) const {
    const TFsPath path = TFsPath(context.Dir.PathName()) / "index.docurl";

    INFO_LOG << "Extracting urls from " << context.Dir.BaseName() << Endl;
    TVector<TString> urls(context.Managers.GetDocumentsCount());
    for (auto iterator = context.Managers.GetDocSearchInfoIterator(); iterator && iterator->IsValid(); iterator->Next()) {
        if (iterator->IsDeleted()) {
            continue;
        }

        const TDocSearchInfo& docSearchInfo = iterator->Get();
        CHECK_WITH_LOG(docSearchInfo.GetDocId() < urls.size());
        urls[docSearchInfo.GetDocId()] = docSearchInfo.GetUrl();
    }

    INFO_LOG << "Restoring " << path << Endl;
    TFile2DArrayWriter<ui32, char> writer(path);
    for (size_t docId = 0; docId < urls.size(); ++docId) {
        if (docId) {
            writer.NewLine();
        }
        for (auto&& c : std::as_const(urls[docId])) {
            writer.Write(c);
        }
    }
    writer.Finish();
    INFO_LOG << "Completely restored " << path << Endl;

    TDocUrlIndexReader reader(path);
    AssertCorrectIndex(reader.Size() == urls.size(), "created incorrect index.docurl");
    for (size_t docId = 0; docId < urls.size(); ++docId) {
        AssertCorrectIndex(urls[docId] == reader.Get(docId), "invalid index.docurl for id %d", docId);
    }
    INFO_LOG << "Validated " << path << Endl;
}
