#include "fresh_pruning_builder.h"

#include <robot/library/oxygen/indexer/idspace/docid.h>
#include <robot/library/oxygen/base/sr_utils/sr_utils.h>

#include <library/cpp/uri/uri.h>
#include <library/cpp/logger/global/global.h>
#include <util/folder/path.h>
#include <util/generic/algorithm.h>
#include <library/cpp/string_utils/url/url.h>

TPruningDataAccessor::TPruningDataAccessor(TVector<TString> &&rankNames)
    : RankNames(rankNames)
{
    VERIFY_WITH_LOG(RankNames.size(), "One or more ranks should be defined");
    Ranks.resize(RankNames.size());
}

void TPruningDataAccessor::Reserve(ui32 nEntries) {
    Data.reserve(nEntries);
    DocId2Index.reserve(nEntries);
    for (auto& rank : Ranks)
        rank.reserve(nEntries);
}

bool TPruningDataAccessor::AddEntry(ui32 tmpDocId, TStringBuf url, const TRankValues & rankValues) {
    VERIFY_WITH_LOG(rankValues.size() <= Ranks.size(), "Not enough elements in TRankList");

    TMaybe<TString> invertedUrl = NOxygen::TPruningBuilder::InverseDomainsInUrl(url);
    if (invertedUrl.Empty()) {
        return false;
    }

    Data.emplace_back();
    auto& entry = Data.back();
    entry.TmpDocId = tmpDocId;
    entry.InvertedUrl = invertedUrl.GetRef();
    if (tmpDocId >= DocId2Index.size()) {
        DocId2Index.resize(tmpDocId + 1);
    }
    DocId2Index[tmpDocId] = Data.size() - 1;

    int nSource = 0;
    for (auto rankValue : rankValues)
        Ranks[nSource++].push_back(rankValue);

    return true;
}

TVector<ui32> TPruningDataAccessor::GetTmpDocIds() {
    TVector<ui32> result(Data.size());
    for (ui32 i = 0; i < Data.size(); ++i)
        result[i] = Data[i].TmpDocId;
    return result;
}

TString TPruningDataAccessor::GetInvertedUrl(ui32 docId) {
    return GetByDocId(docId).InvertedUrl;
}

float TPruningDataAccessor::GetMainRank(ui32 docId) {
    return GetByDocId(docId).RegionalSelectionRank;
}

NOxygen::TPruningRanks TPruningDataAccessor::GetAllRanks(ui32 docId) {
    NOxygen::TPruningRanks result;
    result.reserve(Ranks.size());
    for (ui32 nSource = 0; nSource < Ranks.size(); ++nSource) {
        NOxygen::TPruningRank rankValue;
        rankValue.Name = RankNames[nSource];
        rankValue.Value = GetRank(GetIndex(docId), nSource);
        result.push_back(std::move(rankValue));
    }
    return result;
}

ui32 TPruningDataAccessor::GetRanksNum() const {
    return Ranks.size();
}



TFreshPruningBuilder::TFreshPruningBuilder(const NOxygen::TPruningConfig& pruningConfig, const TPruningZoneParams& zoneParams, TPruningDataAccessor& data)
    : NOxygen::TPruningBuilder(pruningConfig, data)
    , Proportions(zoneParams.Proportions)
    , Data(data)
{
    VERIFY_WITH_LOG(Proportions.size(), "TPruningZoneParams should be provided");

    if (Proportions.size() != data.GetRanksNum())
        Proportions.resize(data.GetRanksNum());
}

void TFreshPruningBuilder::DumpSln(IOutputStream* out) const {
    for (const auto& doc : PruningEntryList) {
        float rank = Data.GetMainRank(doc.TmpDocId);
        out->Write(&rank, sizeof(rank));
    }
}

void TFreshPruningBuilder::CalculatePruning(
    NOxygen::TPruningEntryList* pruningEntryList,
    const NOxygen::TPruningConfig& config)
{
    typedef TVector<NOxygen::TPruningEntry*> TZoneSort;
    const ui32 nEntries = pruningEntryList->size();
    const ui32 nZones = (ui32)Proportions.size();

    TVector<TZoneSort> zoneTiers; //Sort orders for each one of the language tiers
    zoneTiers.resize(nZones);

    TPruningDataArray& impl = Data.GetInternalArray();

    //Sort each tier according to the tier-specific rank
    for (ui32 zone = 0; zone < nZones; zone++) {
        TZoneSort& tier = zoneTiers[zone];
        tier.resize(nEntries);

        for (ui32 i = 0; i < nEntries; ++i) {
            tier[i] = &(*pruningEntryList)[i];
        }

        StableSort(tier.begin(), tier.end(),
            [this, zone](const NOxygen::TPruningEntry* i, const NOxygen::TPruningEntry* j)->bool {
                float rankI = Data.GetSelectionRank(i->TmpDocId, zone);
                float rankJ = Data.GetSelectionRank(j->TmpDocId, zone);
                if (rankI == rankJ)
                    return Data.GetInvertedUrl(i->TmpDocId) > Data.GetInvertedUrl(j->TmpDocId);

                return rankI > rankJ;
            }
        );
    }

    //Merge zone-specific orderings into one sequence
    impl.ResetSelection();

    TVector<ui32> pos(nZones, 0);
    ui32 seqId = 0;

    while (seqId < pruningEntryList->size()) {
        for (ui32 zone = 0; zone < nZones; zone++) {
            ui32 p = pos[zone];
            if (p == nEntries)
                continue;

            ui32 cnt = Proportions[zone];
            for (;cnt && p != nEntries; ++p) {
                NOxygen::TPruningEntry* entry = zoneTiers[zone][p];
                ui32 idx = Data.GetInternalIndex(entry->TmpDocId);

                // Skip if the entry is already included, or if this rank is not applicable
                if (impl.IsBusy(idx) || impl.IsUndefined(idx, zone))
                    continue;

                impl.SetSource(idx, zone); //sets RegionalSelectionRank for an entry

                entry->FinalDocId = seqId++;
                --cnt;
            }
            pos[zone] = p;
        }
    }

    //In the case when there are entries without any rank data, they are included atrificially with MinPossibleRank
    static_assert(NOxygen::MinPossibleRank != TPruningDataArray::UndefinedRankValue, "invalid value of a constant");
    for(ui32 i = 0; i < nEntries; i++) {
        NOxygen::TPruningEntry& entry = pruningEntryList->at(i);
        ui32 idx = Data.GetInternalIndex(entry.TmpDocId);
        if (!impl.IsBusy(idx)) {
            impl.Data[idx].RegionalSelectionRank = (float)NOxygen::MinPossibleRank;
            entry.FinalDocId = seqId++;
        }
    }

    //Reorder the data
    StableSort(pruningEntryList->begin(), pruningEntryList->end(),
               [](const NOxygen::TPruningEntry& i, const NOxygen::TPruningEntry& j) {
                   return i.FinalDocId < j.FinalDocId;
               }
    );

    // Documents are then split into different groups according to TPruningConfig
    CalculateGroups(pruningEntryList, config);

    ui32 finalDocId = 0;
    for (auto& doc : *pruningEntryList) {
        if (doc.FinalDocId != NOxygen::TDocIdMap::DeletedDocument()) {
            doc.FinalDocId = finalDocId++;
        }
    }
}

