#include "searcher.h"
#include <saas/rtyserver/config/searcher_config.h>
#include <saas/rtyserver/key_inv_processor/ki_maker.h>
#include <kernel/qtree/richrequest/richnode.h>
#include <kernel/qtree/request/req_node.h>
#include <kernel/qtree/request/reqscan.h>
#include <kernel/web_factors_info/factor_names.h>
#include <ysite/yandex/posfilter/filtration_model.h>
#include <ysite/yandex/posfilter/tr_iterator.h>
#include <ysite/yandex/posfilter/filter_tree.h>
#include <library/cpp/token/charfilter.h>
#include <library/cpp/digest/md5/md5.h>

TSuggestSearcher::TSuggestSearcher(const TSuggestIndexManager& manager, const TCgiParameters& cgi, ICustomReportBuilder& reportBuilder)
    : Manager(manager)
    , Cgi(cgi)
    , ReportBuilder(reportBuilder)
    , HitsLoader(Manager.GetReader().GetYR()->YMain())
    , Numdoc(10)
    , PruneCount(Manager.GetRTYConfig().ComponentsConfig.Get<TSuggestComponentConfig>(SUGGEST_COMPONENT_NAME)->GetDefaultPruneCount())
    , RankModel(nullptr)
    , Wildcard(false)
    , GtaJsonFactors(false)
    , FsgtaJsonFactors(false)
{}

void TSuggestSearcher::PrepareSearch() {
    TryFromString<ui32>(Strip(Cgi.Get("numdoc")), Numdoc);
    Found.reserve(Numdoc);
    ParseRequest();
    if (NormalizedRequest.empty())
        return;
    ParseKpses();
    ParseRelev();
    ParsePron();
    SuggestFactorsCalcer.Init(Manager.GetRTYConfig(), *RankModel, NormalizedRequest.size());
    GtaJsonFactors = Cgi.Has("gta", "_JsonFactors");
    FsgtaJsonFactors = Cgi.Has("fsgta", "_JsonFactors");
}

void TSuggestSearcher::ParseKpses() {
    if (!Cgi.Has("sgkps"))
        return;
    TVector<TString> kpsess = SplitString(Cgi.Get("sgkps"), ",");
    Kpses.reserve(kpsess.size());
    for (const auto& kps : kpsess) {
        try {
            Kpses.push_back(FromString<ui64>(Strip(kps)));
        }
        catch (...) {
            ReportBuilder.AddErrorMessage("invalid sgkps " + kps + ": " + CurrentExceptionMessage());
        }
    }
}

void TSuggestSearcher::ParseRelev() {
    for (size_t i = 0, e = Cgi.NumOfValues("relev"); i < e && !RankModel; ++i) {
        const TString& val = Cgi.Get("relev", i);
        TVector<TString> values = SplitString(val, ";");
        for (const auto& p : values) {
            const TVector<TString> parts = SplitString(p, "=");
            if (parts.size() < 1)
                continue;
            else if (parts.size() < 2)
                continue;
            else if (parts[0] == "formula")
                RankModel = Manager.GetRTYConfig().GetSearcherConfig().Factors->GetRankModel(parts[1]);
        }
    }
    if (!RankModel)
        RankModel = Manager.GetRTYConfig().GetSearcherConfig().Factors->GetRankModel("");
    CHECK_WITH_LOG(RankModel);
    FactorsCount = RankModel->GetUsedFactors().empty() ? 0 : (*RankModel->GetUsedFactors().rbegin() + 1);
}

void TSuggestSearcher::ParsePron() {
    TSet<TString> pron;
    for (size_t i = 0; i < Cgi.NumOfValues("pron"); ++i) {
        const TVector<TString> onePron = SplitString(Cgi.Get("pron", i), ";");
        pron.insert(onePron.begin(), onePron.end());
    }

    if (pron.empty())
        return;

    TSet<TString>::const_iterator i = pron.lower_bound("pruncount");
    if (i != pron.end() && i->StartsWith("pruncount"))
        TryFromString<ui32>(i->c_str() + 9, PruneCount);
}

ERTYSearchResult TSuggestSearcher::Search() {
    PrepareSearch();
    if (!NormalizedRequest.empty()) {
        AdjustTopAndArgs();
        ProcessLeafs();
    }
    return Report();
}

ERTYSearchResult TSuggestSearcher::Report() {
    if (Found.empty())
        return SR_NOT_FOUND;
    for (const auto& suggest : Found) {
        NMetaProtocol::TDocument doc;
        const TString& title = suggest.Suggest;
        doc.MutableArchiveInfo()->SetUrl(title);
        doc.MutableArchiveInfo()->SetTitle(title);
        TString jsonFactors;
        if (GtaJsonFactors || FsgtaJsonFactors)
            jsonFactors = GetJsonFactors(suggest);
        if (FsgtaJsonFactors) {
            NMetaProtocol::TPairBytesBytes& attr = *doc.AddFirstStageAttribute();
            attr.SetKey("_JsonFactors");
            attr.SetValue(jsonFactors);
        }
        if (GtaJsonFactors) {
            NMetaProtocol::TPairBytesBytes& attr = *doc.MutableArchiveInfo()->AddGtaRelatedAttribute();
            attr.SetKey("_JsonFactors");
            attr.SetValue(jsonFactors);
        }
        doc.SetDocId(MD5::Calc(title));
        doc.SetRelevance(suggest.Relevance * (1 << 23));
        ReportBuilder.AddDocument(doc);
    }
    return SR_OK;
}

void TSuggestSearcher::ParseRequest() {
    VERIFY_WITH_LOG(!NormalizedRequest, "request is parsed twice");
    const TString& text = Cgi.Get("text");
    TVector<TStringBuf> words;
    Split(text.data(), " ", words);
    for (const auto& w : words)
        NormalizedRequest.push_back(NormalizeUnicode(UTF8ToWide(w)));
    Wildcard = !text.EndsWith(' ');
}

void TSuggestSearcher::AdjustTopAndArgs() {
    TLangMask langMask;
    TLanguageContext langContext(langMask);
    TCreateTreeOptions cto(langContext);
    cto.NoLemmerNodes = true;
    cto.Reqflags = PRF_WILDCARDS | RPF_NUMBERS_STYLE_NO_ZEROS;
    TRequestNode::TGc GC;
    TRequestNode* node = nullptr;
    TLangSuffix langSuffix = DefaultLangSuffix;
    langSuffix.OpInfo = DefaultPhraseOpInfo;
    for (ui32 i = 0; i < NormalizedRequest.size(); ++i) {
        const TUtf16String& token = NormalizedRequest[i];
        TRequestNode* newNode = GC.CreateWordNode(TLangEntry(token.data()), TWideToken(token.data(), token.size()));
        if (Wildcard && i == NormalizedRequest.size() - 1)
            newNode->SetWildCard(WILDCARD_PREFIX);
        if (node)
            node = GC.CreateBinaryOper(node, newNode, DefaultLangPrefix, langSuffix, PHRASE_PHRASE);
        else
            node = newNode;
    }
    node->UpdateKeyPrefix(Kpses);
    TRichTreePtr rtp = CreateRichTreeFromBinaryTree(node, cto).Release();
    rtp->Root->SetAttrIntervalRestriction(1000000);

    if (LR_OK != GenerateTopAndArgs(&HitsLoader, &HitsLoader, nullptr, *rtp->Root, nullptr,
                                    &TopAndArgs, nullptr, TGenerateTopAndArgOptions(), GetWebFactorsInfo()))
        throw yexception() << "Cannot generate topAndArgs";

}

void TSuggestSearcher::AddFound(ui64 erfAddress, const THits& hits) {
    TSuggestRecord sr = Manager.GetRecord(erfAddress);
    if (!sr.GetCount())
        return;
    TFoundSuggest newFound(sr.GetSuggest(), FactorsCount);
    SuggestFactorsCalcer.CalcFactors(newFound.Factors, sr, hits);
    RankModel->MultiCalc(&newFound.Factors.factors, &newFound.Relevance, 1);
    if (Found.size() < Numdoc) {
        Found.push_back(newFound);
        if (Found.size() == Numdoc)
            MakeHeap(Found.begin(), Found.end());
    } else {
        if (newFound.Relevance > Found.front().Relevance) {
            PopHeap(Found.begin(), Found.end());
            Found.back() = newFound;
            PushHeap(Found.begin(), Found.end());
        }
    }
    --PruneCount;
}

namespace {
    class TDummyModel : public IFiltrationModel {
        float GetTextWeight(const TSkobkaInfo&, size_t, bool) const override {
            return 0.f;
        }

        TString GetCaption() const override {
            return "Dummy";
        }
    };

    const ui32 MAX_HITS_PER_DOCID = 100 * (1 << 6); //100 hits per suggest, 6 bits to erfAdress from second hit.
}
class TSuggestSearcher::THitsStorage {
public:
    inline THitsStorage(ui32 queryWordsCount)
        : FoundWords(queryWordsCount)
        , FoundWordsCount(0)
    {}

    inline void Reset(const TFullPositionEx* firstHint) {
        Hits.Begin = firstHint;
        memset(FoundWords.data(), 0, FoundWords.size() * sizeof(bool));
        FoundWordsCount = 0;
        if (firstHint)
            PushHit(*firstHint);
    }

    inline const TSuggestSearcher::THits& GetHits() const {
        return Hits;
    }

    inline ui32 GetFoundQueryWordsCount() const {
        return FoundWordsCount;
    }

    inline void PushHit(const TFullPositionEx& hit) {
        Hits.End = &hit + 1;
        bool& fw = FoundWords[hit.WordIdx];
        if(!fw) {
            fw = true;
            ++FoundWordsCount;
        }
    }
private:
    TSuggestSearcher::THits Hits;
    TVector<bool> FoundWords;
    ui32 FoundWordsCount;
};

void TSuggestSearcher::ProcessLeafs() {
    THolder<ITRIterator> trIterator(CreateTRIterator(TopAndArgs, Singleton<TDummyModel>(), false, ui32(-2), true));
    TNextDocSwitcher::TResult switchResult;
    TNextDocSwitcher switcher(trIterator.Get(), nullptr);
    ui64 mask = 0;
    TFullPositionEx res[MAX_HITS_PER_DOCID];
    THitsStorage histStorage(NormalizedRequest.size());
    while (PruneCount && switcher.Switch(&switchResult)) {
        ui32 count = trIterator->GetAllDocumentPositions(switchResult.DocId, &res[0], MAX_HITS_PER_DOCID, mask);
        ui32 lastBreak = Max<ui32>();
        histStorage.Reset(nullptr);
        for (ui32 i = 0; PruneCount && i < count; ++i) {
            ui32 br = TWordPosition::Break(res[i].Pos.Beg);
            if (lastBreak == br) {
                histStorage.PushHit(res[i]);
            } else {
                if (histStorage.GetFoundQueryWordsCount() == NormalizedRequest.size())
                    AddFound((switchResult.DocId << 6) + lastBreak, histStorage.GetHits());
                histStorage.Reset(&res[i]);
                lastBreak = br;
            }
        }
        if (PruneCount && histStorage.GetFoundQueryWordsCount() == NormalizedRequest.size())
            AddFound((switchResult.DocId << 6) + lastBreak, histStorage.GetHits());
    }
    StableSort(Found.begin(), Found.end());
}

TString TSuggestSearcher::GetJsonFactors(const TFoundSuggest& record) const {
    NJson::TJsonValue result(NJson::JSON_MAP);
    for (ui32 factorIndex : RankModel->GetUsedFactors())
        result.InsertValue(Manager.GetRTYConfig().GetSearcherConfig().Factors->GetFactorName(factorIndex), record.Factors.factors[factorIndex]);
    return result.GetStringRobust();
}

TSuggestSearcher::~TSuggestSearcher() {
}
