#include "builder.h"
#include "const.h"
#include "component.h"
#include "manager.h"

#include <saas/rtyserver/common/should_stop.h>
#include <saas/rtyserver/components/erf/erf_disk.h>
#include <saas/rtyserver/components/erf/erf_writer.h>
#include <saas/rtyserver/indexer_core/indexer_core.h>
#include <saas/rtyserver/key_inv_processor/ki_maker.h>

#include <kernel/keyinv/invkeypos/keyconv.h>
#include <yweb/config/pruning.h>

#include <library/cpp/ext/sizer.h>
#include <library/cpp/logger/global/global.h>
#include <library/cpp/token/charfilter.h>
#include <library/cpp/tokenizer/tokenizer.h>

namespace {
    constexpr size_t MaxKeyLength = 100;

    struct TWordsTokenizer: public ITokenHandler {
    private:
        ui32 ErfPos;
        ui32 WordNum;
        ui64 Kps;
        TMap<TString, TVector<SUPERLONG>> KeyPos;

        TFormToKeyConvertor Convertor;
        TRTYKIMaker* KIMaker;

    public:
        TWordsTokenizer(TRTYKIMaker* kiMaker)
            : KIMaker(kiMaker)
        {}

        ~TWordsTokenizer() {
            Flush();
        }

        void Next(ui32 erfPos, ui64 kps) {
            Flush();
            WordNum = 0;
            ErfPos = erfPos;
            Kps = kps;
        }

        void Flush() {
            if (!KeyPos.empty()) {
                for (auto word : KeyPos) {
                    SUPERLONG current = 0;
                    for (auto position : word.second) {
                        CHECK_WITH_LOG(current <= position);
                        current = position;
                    }
                    KIMaker->StorePositions(0, word.first.data(), &word.second[0], word.second.size());
                }
                KeyPos.clear();
                KIMaker->IncDoc(0);
            }
        }

        void OnToken(const TWideToken& token, size_t /*origleng*/, NLP_TYPE type) override {
            if (type == NLP_WORD || type == NLP_INTEGER || type == NLP_FLOAT || type == NLP_MARK) {
                size_t len = 0;
                TChar buffer[MaxKeyLength];
                size_t n = NormalizeUnicode(token.Token, token.Leng, buffer, MaxKeyLength);
                const char* raw = Convertor.Convert(buffer, n, len);
                CHECK_LEQ_WITH_LOG(len, 4 * MaxKeyLength + 1);
                TString key(raw, len);

                if (Kps) {
                    key = Ui64ToHex(Kps, 16) + "?" + key;
                }
                SUPERLONG pos = 0;
                ui32 currentDocId = ErfPos >> 6;
                TWordPosition::SetDoc(pos, currentDocId);
                TWordPosition::SetBreak(pos, ErfPos & ((1 << 6) - 1));
                TWordPosition::SetWord(pos, WordNum);
                if (WordNum < WORD_LEVEL_Max)
                    WordNum++;
                KeyPos[key].push_back(pos);
            }
        }
    };
}

TSuggestIndexBuilder::TSuggestIndexBuilder(const TDirConf& dirConfig, const TRTYServerConfig& rtyConfig, const TString& componentName)
    : TBaseGeneratorBuilder(rtyConfig, componentName)
    , Config(rtyConfig.ComponentsConfig.Get<TSuggestComponentConfig>(SUGGEST_COMPONENT_NAME))
    , SuggestSimple(new TSuggestExtSort(ExtSortPageSize, dirConfig.TempDir + "/suggest_tmp_sort_"))
{
    CHECK_WITH_LOG(Config);
}

bool TSuggestIndexBuilder::CloseImpl(const TString& destPath, const TVector<ui32>* remap, const std::atomic<bool>* stopFlag) {
    NOTICE_LOG << "Suggest index building start... for " << destPath << Endl;

    THolder<TExtSort<TSuggestRecord, TSuggestRecord::TLessByFactors>> resortByRelev;
    resortByRelev.Reset(new TExtSort<TSuggestRecord, TSuggestRecord::TLessByFactors>(1 << 25, destPath + "/suggest_tmp_sort_relev_"));

    TSuggestRecord predRecord;
    {
        for (SuggestSimple->Rewind();; SuggestSimple->Advance()) {
            const TSuggestRecord* record = SuggestSimple->AtEnd() ? nullptr : &SuggestSimple->Current();
            if (ShouldStop(stopFlag))
                return false;
            bool doMerge = predRecord.PossibleForMerge(record);
            if (remap && record && record->GetDocId() != TSuggestRecord::NoDocId) {
                if (remap->size() > record->GetDocId() && (*remap)[record->GetDocId()] == REMAP_NOWHERE)
                    continue;
            }
            if (doMerge)
                predRecord.Merge(*record);
            if (!doMerge) {
                if (!!predRecord.GetSuggest()) {
                    predRecord.SuggestLowerCase();
                    predRecord.ResetDocId();
                    resortByRelev->PushBack(predRecord);
                }
                if (record)
                    predRecord = *record;
            }
            if (!record)
                break;
        }
    }
    SuggestSimple.Destroy();

    THolder<TExtSort<TSuggestRecord, TSuggestRecord::TLessByText>> packedTextSort;
    packedTextSort.Reset(new TExtSort<TSuggestRecord, TSuggestRecord::TLessByText>(1 << 25, destPath + "/suggest_tmp_sort_text_"));

    {
        TRTYKIMaker::TPtr kiMaker = new TRTYKIMaker(1, destPath, "index." + NRTYServer::NSuggest::KeyInvName + ".", Config->GetMaxDocumentsPortion());
        kiMaker->Start();
        TRTYErfDiskManager::TCreationContext cc(TPathName{destPath}, NRTYServer::NSuggest::ErfFileName, &FactorsDescription);
        cc.BlockCount = 1024;
        TRTYErfDiskManagerPtr Erf = new TRTYErfDiskManager(cc);
        Erf->Open();
        TErfWriter::TPtr ErfWriter = new TErfWriter(Erf.Get());
        {
            TArchiveWriter<TSuggestRecord> archiveWriter;
            archiveWriter.Open(destPath, NRTYServer::NSuggest::ArchivePrefix);
            TWordsTokenizer tokenizerHandler(kiMaker.Get());
            for (resortByRelev->Rewind(); !resortByRelev->AtEnd(); resortByRelev->Advance()) {
                if (ShouldStop(stopFlag))
                    break;
                TSuggestRecord record = resortByRelev->Current();
                ui32 erfPosition = ErfWriter->Write(record.GetFactors());
                record.ClearFactors();
                tokenizerHandler.Next(erfPosition, record.GetKps());
                TNlpTokenizer tokenizer(tokenizerHandler);
                try {
                    tokenizer.Tokenize(UTF8ToWide(record.GetSuggest()));
                    record.MutableDocId() = erfPosition;
                    archiveWriter.Write(record);
                    packedTextSort->PushBack(record);
                } catch (...) {
                    ERROR_LOG << "Tokenization failed for " << record.GetSuggest() << " " << CurrentExceptionMessage() << Endl;
                }
            }
            archiveWriter.Close();
        }
        Erf->Close();
        kiMaker->Stop();
        kiMaker->Close(nullptr);
    }

    resortByRelev.Destroy();

    {
        TArchiveWriter<TSuggestRecord> archiveWriter;
        archiveWriter.Open(destPath, NRTYServer::NSuggest::SortedArchivePrefix);
        for (packedTextSort->Rewind(); !packedTextSort->AtEnd(); packedTextSort->Advance()) {
            if (ShouldStop(stopFlag))
                break;
            const TSuggestRecord& record = packedTextSort->Current();
            archiveWriter.Write(record);
        }
        archiveWriter.Close();
    }

    packedTextSort.Destroy();

    NOTICE_LOG << "Suggest index building finished for " << destPath << Endl;
    return !ShouldStop(stopFlag);
}

bool TSuggestIndexBuilder::DoClose(const NRTYServer::TBuilderCloseContext& context) {
    CloseImpl(context.DstDir.PathName(), context.RemapTable);
    return true;
}

void TSuggestIndexBuilder::Index(const TSuggestRecord& record) {
    DEBUG_LOG << "Add for index suggest: " << record.GetSuggest() << Endl;
    SuggestSimple->PushBack(record);
}

void TSuggestIndexBuilder::Index(int threadID, const TParsedDocument& document, const ui32 docId) {
    TGuard<TMutex> g(Mutex);
    try {
        const TSuggestParsedEntity* info = document.GetComponentEntity<TSuggestParsedEntity>(SUGGEST_COMPONENT_NAME);
        for (ui32 i = 0; i < info->GetInfoBySentence().CountSents(); ++i) {
            DEBUG_LOG << "Add for index suggest: " << info->GetInfoBySentence().SentInfo(i).GetSuggest() << Endl;
            auto record = info->GetInfoBySentence().SentInfo(i);
            record.MutableDocId() = docId;
            SuggestSimple->PushBack(record);
        }
    } catch (...) {
        CHECK_WITH_LOG(false) << "ProcessDoc failed: URL = " + document.GetDocSearchInfo().GetUrl() + " what = " + CurrentExceptionMessage() << Endl;
    }
    TBaseGeneratorBuilder::Index(threadID, document, docId);
}
