#include "fresh_mixer.h"

#include "fresh_pruning_builder.h"

#include <yweb/protos/export.pb.h>

#include <kernel/yt/logging/log.h>
#include <robot/library/oxygen/indexer/future/future.h>
#include <yweb/robot/urlzones/filters.h>
#include <yweb/robot/urlzones/zone_detection.h>
#include <yweb/config/pruning.h>

#include <search/pruning/file/suffix.h>

#include <library/cpp/logger/global/global.h>
#include <util/generic/maybe.h>
#include <util/stream/file.h>
#include <util/string/escape.h>
#include <library/cpp/string_utils/url/url.h>
#include <iostream>

using namespace NOxygen;


TFreshMixer::TFreshMixer(
    const TAtomicSharedPtr<IThreadPool> taskPool,
    const NOxygen::TProcessorPtr slave,
    const NOxygen::TRTYMixerOptions &options,
    const TString& outputPrefix,
    bool selectionRankStore
)
        : NOxygen::TPruningProcessorBase(slave, selectionRankStore)
        , TaskPool(taskPool)
        , TupleNameForUrl(options.GetUrlTupleName())
        , TupleNameForMixerLanguage(options.GetLanguageTupleName())
        , OutputPrefix(outputPrefix)
        , PruningConfig(LoadPruningConfig(options))
        , LangDomainFilter(MakeHolder<NZones::TLangDomainSearchZoneFilter>())
        , LangSearchFilter(MakeHolder<NZones::TLangSearchZoneFilter>())

{
    VERIFY_WITH_LOG(TupleNameForUrl.size(), "TupleNameForUrl is required");
    VERIFY_WITH_LOG(TupleNameForMixerLanguage.size(), "TupleNameForMixerLanguage is required");

    NOxygen::TPruningProcessor::ESourceType sourceType = ProtoToSourceType(options.GetSourceType());
    NOxygen::TPruningProcessor::ESourceType secondarySourceType = ProtoToSourceType(options.GetSecondarySourceType());

    for (size_t i = 0; i < options.RegionSize(); ++i) {
        const NOxygen::TRTYMixerOptions::TRTYMixerLanguage &lang = options.GetRegion(i);
        ELanguage langId = LanguageByName(lang.GetLanguageRegionName());

        TString rankValuesTupleName = lang.GetRankValuesTupleName();
        TString secondaryRankValuesTupleName = lang.GetSecondaryRankValuesTupleName();
        TString selectionRank = lang.GetSelectionRankName();
        TString secondarySelectionRank = lang.GetSecondarySelectionRankName();
        ui32 proportion = lang.GetProportion();
        bool isRequired = lang.GetIsRequired();

        VERIFY_WITH_LOG(selectionRank.size(), "SelectionRank is required");
        VERIFY_WITH_LOG(!secondaryRankValuesTupleName == !secondarySelectionRank, "Incorrect secondary pruning configuration");


        TZonePruningOpts o = {langId, rankValuesTupleName, secondaryRankValuesTupleName,
                           selectionRank,
                           secondarySelectionRank,
                           TPruningProcessor::BuildSelectionRankSource(selectionRank, sourceType),
                           TPruningProcessor::BuildSelectionRankSource(secondarySelectionRank, secondarySourceType),
                           proportion, isRequired};

        VERIFY_WITH_LOG(o.SelectionRankSource, "SelectionRankSource is required");

        ZoneOpts.push_back(std::move(o));
    }


    VERIFY_WITH_LOG(ZoneOpts.size(), "At least one Language options is required");

}

NOxygen::TPruningConfig TFreshMixer::LoadPruningConfig(const NOxygen::TRTYMixerOptions &options) {
    TVector<ui16> groupBoundaries;
    for (size_t i = 0; i < options.GroupBoundariesSize(); ++i) {
        groupBoundaries.push_back(options.GetGroupBoundaries(i));
    }
    if (groupBoundaries.empty()) {
        groupBoundaries = GetPruningGroupBoundaries();
    }
    return NOxygen::TPruningConfig{groupBoundaries};
}

TFreshMixer::~TFreshMixer() {
}

TPruningZoneParams TFreshMixer::BuildZoneParams() const {
    TPruningZoneParams res;
    auto& proportions = res.Proportions;
    proportions.resize(ZoneOpts.size());

    if (ZoneOpts.size() == 1) {
        proportions[0] = std::numeric_limits<ui32>::max();
        return res;
    } else {
        for (ui32 i = 0; i < ZoneOpts.size(); ++i)
            proportions[i] = ZoneOpts[i].Proportion;
    }

    //Simplify the proportion, i.e.:  40:60:20 -> 2:3:1
    for (int factor : {13,11,7,5,3,2}) {
        while (std::all_of(proportions.begin(), proportions.end(), [factor](ui32 a){ return a % factor == 0; }))
            std::transform(proportions.begin(), proportions.end(), proportions.begin(), [factor](ui32 a) { return a / factor; });
    }
    return res;
}

TTuplesUsageInfo TFreshMixer::GetRequiredTuples() const {
    TTuplesUsageInfo ret;
    if (!!Slave) {
        ret = Slave->GetRequiredTuples();
    }

    ret.AddTuple(TupleNameForUrl);
    ret.AddTuple(TupleNameForMixerLanguage, true); //not required
    for (const auto& zone : ZoneOpts) {
        ret.AddTuple(zone.RankValuesTupleName, !zone.IsRequired);
        if (zone.SecondaryRankValuesTupleName.size())
            ret.AddTuple(zone.SecondaryRankValuesTupleName, !zone.IsRequired);
    }

    return ret;
}

void TFreshMixer::Start() {
    if (!!Slave) {
        Slave->Start();
    }

    TVector<TString> rankNames;
    for (const auto& zone : ZoneOpts)
        rankNames.push_back(zone.SelectionRank);

    PruningData.Reset(new ::TPruningDataAccessor(std::move(rankNames)));
}

bool TFreshMixer::BelongsToLangZone(ELanguage languageZone, const TString& host, ELanguage docLanguage) const {
    // yweb/robot/urlzones/zone_detection.h does not provide a unified method like this, so we use our (approximate)
    // classification approach - just to balance the load among shards


    switch (languageZone) {
        case LANG_RUS:
            return NZones::BelongsToRUSZone(host.data(), docLanguage, *LangDomainFilter);
        case LANG_TUR:
            return NZones::BelongsToTURZone(host.data(), docLanguage);
        case LANG_ENG:
            return NZones::BelongsToENGZone(docLanguage, *LangSearchFilter);
        case LANG_IND:
            return NZones::BelongsToIndonesianZone(host.data(), docLanguage);

        //Other zones (given in order of zone_detection.h/BelongsTo* function
        //Note: This implementation is probably not used now
        case LANG_UKR:
            return NZones::BelongsToUKRZone(host.data(), docLanguage);
        case LANG_AZE:
            return NZones::BelongsToAZEZone(host.data(), docLanguage);
        case LANG_POL:
            return NZones::BelongsToPOLZone(host.data(), docLanguage);
        case LANG_EST:
        case LANG_LIT:
        case LANG_LAV:
        case LANG_RUM:
            return NZones::BelongsToDEVZone(host.data(), docLanguage);
        case LANG_KAZ:
            return NZones::BelongsToKZZone(host.data(), docLanguage);
        case LANG_GER:
            return NZones::BelongsToGermanZone(host.data(), docLanguage);
        case LANG_ITA:
            return NZones::BelongsToItalianZone(host.data(), docLanguage);

        default:
            Y_FAIL("Not supported language zone (please update TFreshMixer::BelongsToLangZone() method)");
            return false;
    }
}

TMaybe<float> TFreshMixer::TryFetchRank(TStringBuf url, NOxygen::TObjectContext& objectContext, const TZonePruningOpts& zone) const {
    TMaybe<float> selectionRank;
    TStringBuf rankValues;
    TStringBuf secondaryRankValues;

    if (objectContext.Has(zone.RankValuesTupleName)) {
        rankValues = objectContext.Get<TStringBuf>(zone.RankValuesTupleName);
        try {
            selectionRank = zone.SelectionRankSource->Get(rankValues);
        } catch (const yexception& e) {
            L_ERROR << "can't parse selection rank(" << zone.SelectionRank << " from " << zone.RankValuesTupleName << ") for url: `"
            << url << "`:" << e.what();
        }
    }

    if (!!zone.SecondaryRankValuesTupleName && selectionRank.Empty()) {
        if (objectContext.Has(zone.SecondaryRankValuesTupleName)) {
            secondaryRankValues = objectContext.Get<TStringBuf>(zone.SecondaryRankValuesTupleName);
            try {
                selectionRank = zone.SecondarySelectionRankSource->Get(secondaryRankValues);
            } catch (const yexception &e) {
                L_ERROR << "can't parse selection rank(" << zone.SecondarySelectionRank << " from " <<
                zone.SecondaryRankValuesTupleName << ") for url: `"
                << url << "`:" << e.what();
            }
        }
    }

    if (selectionRank.Empty()) {
        TString rankValuesDebug = EscapeC(rankValues);
        TString secondaryRankValuesDebug = EscapeC(secondaryRankValues);
        L_ERROR << "Selection rank '" << zone.SelectionRank << "/" << zone.SecondarySelectionRank << " from " << zone.RankValuesTupleName << "/" << zone.SecondaryRankValuesTupleName
        << "' was not found for '" << url << "' in rank values '" << rankValuesDebug << "' / '" << secondaryRankValuesDebug << "'";
    }

    return selectionRank;
}

bool TFreshMixer::FetchRanks(TStringBuf url, TRankValues& ranks, NOxygen::TObjectContext& objectContext) {
    //FetchRanks is called for each entry from Process
    Y_VERIFY(ranks.size() >= ZoneOpts.size(), "TRankValues 'ranks' should be preallocated");

    TString host(GetHost(url)); //copied because of NZones API (char*)
    ELanguage docLanguage = LANG_UNK;

    if (objectContext.Has(TupleNameForMixerLanguage)) {
        docLanguage = (ELanguage)objectContext.Get<ui32>(TupleNameForMixerLanguage);
        Y_VERIFY(docLanguage < LANG_MAX);
    }

    int nMatchingZones = 0; // Number of matching zones

    for (ui32 i = 0; i < ZoneOpts.size(); ++i) {
        const TZonePruningOpts &zone = ZoneOpts[i];
        ELanguage region = zone.LanguageRegion;
        bool belongs = BelongsToLangZone(region, host, docLanguage);

        if (!belongs) {
            ranks[i] = TPruningDataArray::UndefinedRankValue;
            continue;
        }

        ++nMatchingZones;

        TMaybe<float> selectionRank = TryFetchRank(url, objectContext, zone);

        if (!selectionRank.Empty())
            ranks[i] = selectionRank.GetRef();
        else {
            ranks[i] = TPruningDataArray::UndefinedRankValue;
            if (zone.IsRequired)
                return false; //An error (was logged in TryFetchRank)
        }
    }

    if (nMatchingZones == 0) {
        //No matches at all? Plan B: the document will be included into all zones

        for (ui32 i = 0; i < ZoneOpts.size(); ++i) {
            const TZonePruningOpts &zone = ZoneOpts[i];
            TMaybe<float> selectionRank = TryFetchRank(url, objectContext, zone);

            if (!selectionRank.Empty()) {
                ranks[i] = selectionRank.GetRef();
                ++nMatchingZones;
            }
        }
    }

    if (nMatchingZones == 0)
        return false; //failure

    return true;
}


TFuture<TReturnObjectContext> TFreshMixer::Process(TObjectContext objectContext, ui32 tmpObjectId) {

    TStringBuf url = objectContext.Get<TStringBuf>(TupleNameForUrl);
    TRankValues ranks(ZoneOpts.size());

    bool ok = FetchRanks(url, ranks, objectContext);

    if (!ok) {
        // TODO: test this branch
        return MakeFuture(TReturnObjectContext(
                        TObjectErrorBuilder(EObjectErrorType::SrNotFound)
                                .SetLabel("No rank value")
                                .SetProcessor(GetClassName()),
                        /*writable=*/true,
                        /*identifiable=*/true,
                        /*indexable=*/false));
    }

    TFuture<TReturnObjectContext> ret = Slave ? Slave->Process(objectContext, tmpObjectId) : TReturnObjectContext::FutureOk;
    TFuture<void> combinedDelay = AsyncDelay.Initialized() ? WaitExceptionOrAll(ret.IgnoreResult(), AsyncDelay) : ret.IgnoreResult();

    TString urlCopy = TString{url};
    TString className = GetClassName();

    TFuture<TReturnObjectContext> newRet = AsyncWithDeps(
            [this, ret, className, ranks, tmpObjectId, urlCopy] () -> TReturnObjectContext {
                TReturnObjectContext retContext = ret.GetValue();
                if (!retContext.IsIndexable()) {
                    return retContext;
                }
                // NOTE: we want to add document to our table only in case our child processors
                // returned ok
                bool added = PruningData->AddEntry(tmpObjectId, urlCopy, ranks);
                if (!added) {
                    return TReturnObjectContext(
                            TObjectErrorBuilder(EObjectErrorType::BadUrl)
                                    .SetInfo(TStringBuilder() << "Bad url: " << urlCopy)
                                    .SetProcessor(className),
                            /*writable=*/true,
                            /*identifiable=*/true,
                            /*indexable=*/false
                    );
                }
                return retContext;
            },
            *TaskPool,
            combinedDelay
    );
    AsyncDelay = newRet.IgnoreResult(); // TFuture is IDelay

    return newRet;
}

THolder<NOxygen::IPruningBuilder> TFreshMixer::CreatePruningBuilder() const {
    return MakeHolder<TFreshPruningBuilder>(PruningConfig, BuildZoneParams(), *PruningData.Get());
}

void TFreshMixer::Finish(const TDocIdMap* externalMap) {
    NTraceUsage::TFunctionScope functionScope(TStringBuf("TFreshMixer::Finish"));
    if (AsyncDelay.Initialized()) {
        AsyncDelay.Wait();
        AsyncDelay = TFuture<void>();
    }

    THolder<IPruningBuilder> pruningBuilder = CreatePruningBuilder();
    pruningBuilder->Finish(externalMap);

    FinalMap = pruningBuilder->GetDocIdMap();
    if (!!Slave) {
        Slave->Finish(FinalMap.Get());
    }

    DumpBuilder(*pruningBuilder, OutputPrefix);

    PruningData.Reset();
}

TDocIdMap TFreshMixer::CreateDocIdMapAsInFinish(const TDocIdMap* externalMap) const {
    THolder<IPruningBuilder> pruningBuilder = CreatePruningBuilder();
    pruningBuilder->Finish(externalMap);
    return pruningBuilder->GetDocIdMap();
}

const TDocIdMap& TFreshMixer::GetDocIdMap() const {
    if (FinalMap.Empty()) {
        ythrow yexception() << "GetDocIdMap shall be called after Finish";
    }
    return FinalMap.GetRef();
}


NOxygen::TPruningProcessor::ESourceType TFreshMixer::ProtoToSourceType(NOxygen::TRTYMixerOptions::ESourceType protoSourceType) {
    switch (protoSourceType) {
        case NOxygen::TRTYMixerOptions::Walrus:
            return NOxygen::TPruningProcessor::ESourceType::Walrus;
        case NOxygen::TRTYMixerOptions::Ukrop:
            return NOxygen::TPruningProcessor::ESourceType::Ukrop;
        default:
            ythrow yexception() << "Unknown pruning source type " << int(protoSourceType);
    }
}

