#include "processor.h"

#include <search/pruning/file/suffix.h>

#include <kernel/yt/logging/log.h>
#include <robot/library/oxygen/indexer/future/future.h>

#include <library/cpp/logger/global/global.h>

#include <util/digest/city.h>
#include <util/stream/file.h>

#include <algorithm>

using namespace NOxygen;

THashSorterProcessor::THashSorterProcessor(
    const TAtomicSharedPtr<IThreadPool> taskPool,
    const NOxygen::TProcessorPtr slave,
    const TString& urlTupleName,
    const TString& outputPrefix
)
    : TaskPool(taskPool)
    , Slave(slave)
    , TupleNameForUrl(urlTupleName)
    , OutputPrefix(outputPrefix)
{
    VERIFY_WITH_LOG(TupleNameForUrl.size(), "TupleNameForUrl is required");
}

THashSorterProcessor::~THashSorterProcessor() {
}

TTuplesUsageInfo THashSorterProcessor::GetRequiredTuples() const {
    TTuplesUsageInfo ret;
    if (Slave) {
        ret = Slave->GetRequiredTuples();
    }
    ret.AddTuple(TupleNameForUrl);
    return ret;
}

void THashSorterProcessor::Start() {
    if (Slave) {
        Slave->Start();
    }
}

TFuture<TReturnObjectContext> THashSorterProcessor::Process(TObjectContext objectContext, ui32 tmpObjectId) {
    const TStringBuf url = objectContext.Get<TStringBuf>(TupleNameForUrl);
    if (Docs.size() < tmpObjectId + 1) {
        Docs.resize(tmpObjectId + 1);
    }
    Docs[tmpObjectId] = {
        CityHash64(url),
        tmpObjectId
    };

    const TFuture<TReturnObjectContext> nestedFuture = Slave ? Slave->Process(objectContext, tmpObjectId) : TReturnObjectContext::FutureOk;
    const TReturnObjectContext& nestedResult = nestedFuture.GetValue(TDuration::Max());
    if (!nestedResult.IsIndexable()) {
        BadDocs.insert(tmpObjectId);
    }
    return nestedFuture;
}

void THashSorterProcessor::Finish(const TDocIdMap* externalMap) {
    NTraceUsage::TFunctionScope functionScope(TStringBuf("THashSorterProcessor::Finish"));
    const auto remapResult = CreateDocIdMapAsInFinish(Docs, externalMap);
    if (Slave) {
        Slave->Finish(&remapResult.DocIdMap);
    }
    if (OutputPrefix) {
        TOFStream output(OutputPrefix + NPruningData::SELECTION_SUFFIX);
        for (float rank : remapResult.Ranks) {
            output.Write(&rank, sizeof(rank));
        }
    }
}

ui64 THashSorterProcessor::GetVersion() const {
    return Slave->GetVersion();
}

ui64 THashSorterProcessor::GetVersion(const TFsPath& directory) const {
    return Slave->GetVersion(directory);
}

TDocIdMap THashSorterProcessor::CreateDocIdMapAsInFinish(const TDocIdMap* externalMap) const {
    auto docs = Docs;
    return CreateDocIdMapAsInFinish(docs, externalMap).DocIdMap;
}

THashSorterProcessor::TRemapResult THashSorterProcessor::CreateDocIdMapAsInFinish(TDocs& docs, const TDocIdMap* externalMap) const {
    std::stable_sort(docs.begin(), docs.end(), [](const TDoc& left, const TDoc& right) {
        return left.Hash > right.Hash;
    });

    auto removed = [externalMap, this] (const TDoc& doc) {
        const bool removedExternally = externalMap ? externalMap->Map(doc.TmpDocId) == TDocIdMap::DeletedDocument() : false;
        const bool removedInternally = BadDocs.contains(doc.TmpDocId);
        return removedExternally || removedInternally;
    };

    TRemapResult result;
    ui32 finalDocId = 0;
    for (auto&& doc : docs) {
        if (removed(doc)) {
            continue;
        }
        result.DocIdMap.SetMapValue(doc.TmpDocId, finalDocId++);
        result.Ranks.push_back(doc.Hash);
    }
    return result;
}
