#include "builder.h"

#include "parsed_entity.h"
#include "const.h"

#include <saas/rtyserver/components/generator/builder.h>
#include <saas/rtyserver/config/const.h>
#include <saas/rtyserver/unistat_signals/signals.h>

#include <saas/rtyserver_jupi/library/tables/tables.h>

#include <robot/jupiter/library/rtdoc/file/model/docidmap.h>
#include <robot/jupiter/library/rtdoc/file/writer.h>
#include <robot/jupiter/protos/calculated_factors.pb.h>

#include <google/protobuf/message.h>

namespace NRTYServer {

TJupiBuilder::TJupiBuilder(const TString& indexFilesPrefix, bool rtytEnabled, NFusion::TRTYTBuilderConfig config)
    : IIndexComponentBuilder(JupiComponentName)
    , Prefix(indexFilesPrefix)
    , CypressPrefix("/")
    , Manager(new TJupiComponentManager())
    , RtytEnabled(rtytEnabled)
    , RtytConfig(std::move(config))
{
    CreateWriter();
    if (RtytEnabled) {
        InitializeRTYT();
    }
}

void TJupiBuilder::CreateWriter() {
    Writer.Reset(new NRtDoc::TPrepWriter(Prefix));
}

void TJupiBuilder::InitializeRTYT() {
    RtytClient = new NRTYT::TClientBase(TFsPath(Prefix).Parent());
    auto allPrepTypes = NFusion::GetRTYTPrepTableTypes();
    for (const auto& lumpName : RtytConfig.GetLumpsToIndex().GetLump()) {
        if (allPrepTypes.contains(lumpName)) {
            PrepTypes[lumpName] = allPrepTypes[lumpName];
        }
        INFO_LOG << "Lump " << lumpName << " is registered to index" << Endl;
    }
}

void TJupiBuilder::Index(int /*threadId*/, const TParsedDocument& document, const ui32 docId) {
    const TJupiParsedEntity* entity = document.GetComponentEntity<TJupiParsedEntity>(NRTYServer::JupiComponentName);
    CHECK_WITH_LOG(entity);

    TBuffer buffer;
    const NJupiter::TMercuryLumps* data = entity->GetData();
    Y_ENSURE(data->LumpsSize() > 0, "the document has no Mercury lumps, docId = " << docId << ", url = " << document.GetUrl());
    bool ok = false;
    for (auto& lump : data->GetLumps()) {
        if (lump.GetName() == "calculated_attrs"sv) {
            NJupiter::TCalculatedFactors cf;
            Y_ENSURE(cf.ParseFromString(lump.GetData()), "cannot parse calculated_attrs, docId = " << docId << ", url = " << document.GetUrl());
            Y_ENSURE(!cf.GetIterateGroupAttrs2().empty(), "calculated_attrs.GetIterateGroupAttrs2() is empty, docId = " << docId << ", url = " << document.GetUrl());

            if (cf.GetBert().empty()) {
                TSaasRTYServerSignals::DoUnistatNoBertEmbedding();
            }

            ok = true;
            break;
        }
    }
    Y_ENSURE(ok, "the document does not have valid calculated_attrs, docId = " << docId << ", url = " << document.GetUrl());

    buffer.Resize(data->ByteSize());
    CHECK_WITH_LOG(data->SerializeToArray(buffer.Data(), buffer.Size()));

    with_lock(Mutex) {
        Writer->AddData(docId, TBlob::FromBufferSingleThreaded(buffer));
    }
    if (RtytEnabled) {
        with_lock(RtytMutex) {
            for (auto lump : data->GetLumps()) {
                auto descriptorIt = PrepTypes.find(lump.GetName());
                if (descriptorIt == PrepTypes.end()) {
                    DEBUG_LOG << "no message type in rtyt set for lump name =" << lump.GetName() << ", skipping" << Endl;
                    continue;
                }
                auto tablePath = CypressPrefix + "/" + lump.GetName();
                if (RtytWriters.find(tablePath) == RtytWriters.end()) {
                    RtytWriters[tablePath] = RtytClient->CreateTableWriter(
                        NYT::TRichYPath(tablePath).Append(true),
                        *(descriptorIt->second),
                        NYT::TTableWriterOptions());
                } 
                auto writer = RtytWriters[tablePath];
                auto dataBytes = lump.GetData();
                THolder<::google::protobuf::Message> protoBuffer = 
                            THolder(::google::protobuf::MessageFactory::generated_factory()->
                                                            GetPrototype(descriptorIt->second)->New());
                if (!protoBuffer->ParseFromString(dataBytes)) {
                    ythrow yexception() << "Failed to parse protobuf message";
                }
                protoBuffer->DiscardUnknownFields();
                auto reflection = protoBuffer->GetReflection();
                auto fieldDescriptor = descriptorIt->second->FindFieldByName("DocId");
                reflection->SetUInt64(protoBuffer.Get(), fieldDescriptor, docId);

                writer->AddRow<::google::protobuf::Message>(*protoBuffer);
            }
        }
    }
}

void TJupiBuilder::WriteIndexFrq(const TString& path, const NRtDoc::TDocIdMap* map, const THashSet<ui32>& deletedDocids) {
    Y_ENSURE(map);
    const TVector<ui32>& data = *map->GetData();
    TFileOutput out(path + "/indexfrq");
    out.SetFinishPropagateMode(true);
    const ui16 presentDoc = 1;
    const ui16 removedDoc = -1;
    ui32 currentDocId = 0;
    for (ui32 docId : data) {
        if (docId == NRtDoc::TDocIdMap::DeletedDocument()) {
            continue;
        }
        for (; currentDocId < docId; ++currentDocId) {
            out.Write(&removedDoc, sizeof(removedDoc));
        }
        Y_ENSURE(currentDocId == docId);
        if (deletedDocids.contains(docId)) {
            out.Write(&removedDoc, sizeof(removedDoc));
        } else {
            out.Write(&presentDoc, sizeof(presentDoc));
        }
        ++currentDocId;
    }

    for (; currentDocId < data.size(); ++currentDocId) {
        out.Write(&removedDoc, sizeof(removedDoc));
    }
    out.Finish();
}

void TJupiBuilder::DoDiscard() {
    if (Manager->IsOpened()) {
        Manager->Close();
    }
}

bool TJupiBuilder::DoClose(const NRTYServer::TBuilderCloseContext& context) {
    if (context.RemapTable) {
        NRtDoc::TDocIdMap map;
        *map.MutableData() = *context.RemapTable;
        Writer->WriteDocIdMap(&map);
        WriteIndexFrq(context.DstDir.PathName(), &map, Manager->GetDeletedDocids());
        TString cypressMapPath = CypressPrefix + "/docid_map";
        NYT::TCreateOptions options;
        options.Attributes(NYT::TNode()("row_count", context.RemapTable->size()));
        if (RtytEnabled) {
            RtytClient->Create(cypressMapPath, NYT::ENodeType::NT_TABLE, options);
            auto writer = RtytClient->CreateTableWriter(NYT::TRichYPath(CypressPrefix + "/docid_map").Append(true));
            for (size_t i = 0; i < context.RemapTable->size(); i++) {
                writer->AddRow(NYT::TNode()("old", i)("new", context.RemapTable->at(i)));
            }
        }
    }

    Writer->Finish();

    if (RtytEnabled) {
        RtytWriters.clear();
        RtytClient->Sync();
    }
    
    TFile jupiIndex(TFsPath(context.DstDir.PathName()) / "jupi_prep_index", WrOnly | OpenAlways);
    if (Manager->IsOpened()) {
        Manager->Close();
    }
    return true;
}

}
