#include <crypta/lookalike/services/index_builder/lib/segment_embeddings_data.h>
#include <crypta/lookalike/services/index_builder/lib/segment_embeddings_data_reader.h>
#include <crypta/lookalike/services/index_builder/proto/index_builder_config.pb.h>
#include <crypta/lookalike/lib/native/common.h>
#include <crypta/lookalike/lib/native/directory_finder.h>
#include <crypta/lookalike/lib/native/get_date.h>
#include <crypta/lookalike/lib/native/get_oldest_timed_table.h>
#include <crypta/lookalike/lib/native/hnsw_index_model.h>
#include <crypta/lookalike/proto/mode.pb.h>
#include <crypta/lookalike/proto/yt_node_names.pb.h>

#include <crypta/lib/native/log/loggers/std_logger.h>
#include <crypta/lib/native/proto_secrets/remove_secrets.h>
#include <crypta/lib/native/time/shifted_clock.h>
#include <crypta/lib/native/yaml/config/config.h>
#include <crypta/lib/native/yt/utils/helpers.h>

#include <library/cpp/hnsw/index_builder/dense_vector_index_builder.h>
#include <library/cpp/hnsw/index_builder/index_writer.h>

#include <mapreduce/yt/common/config.h>
#include <mapreduce/yt/interface/client.h>
#include <mapreduce/yt/util/ypath_join.h>

#include <util/generic/buffer.h>
#include <util/generic/vector.h>
#include <util/stream/buffer.h>

using namespace NCrypta;
using namespace NCrypta::NLookalike;
using namespace NCrypta::NLookalike::NIndexBuilder;

using TStorage = NHnsw::TDenseVectorStorage<TEmbeddingComponent>;

namespace {
    void WriteIndex(NYT::IClientBasePtr client, const TString& path, const NHnsw::THnswIndexData& indexData) {
        if (!client->Exists(path)) {
            client->Create(path, NYT::ENodeType::NT_FILE, NYT::TCreateOptions().Recursive(true));
        }
        auto writer = client->CreateFileWriter(path);
        NHnsw::WriteIndex(indexData, *writer);
        writer->Finish();
    }

    void WriteBlob(NYT::IClientBasePtr client, const TString& path, const void* data, size_t size) {
        if (!client->Exists(path)) {
            client->Create(path, NYT::ENodeType::NT_FILE, NYT::TCreateOptions().Recursive(true));
        }
        auto writer = client->CreateFileWriter(path);
        writer->Write(data, size);
        writer->Finish();
    }

    TString GetWorkingDir(NYT::IClientBasePtr client, const TIndexBuilderConfig& config){
        if (config.GetMode() == ModeValue::NEW) {
            return NDirectoryFinder::FindLastWithChildren(client, config.GetVersionsDir(), {TYtNodeNames().GetFreshSegmentEmbeddingsDir()});
        }
        return NDirectoryFinder::FindLastWithChildren(client, config.GetVersionsDir(), {TYtNodeNames().GetSegmentEmbeddingsTable()});
    }
}

int main(int argc, const char** argv) {
    auto log = NLog::NStdLogger::RegisterLog("main", "stdout", "info");
    log->info("================ Start ================");

    try {
        const auto& config = ParseYamlConfig<TIndexBuilderConfig>(argc, argv);

        log->info("Config:\n{}", NProtoSecrets::GetCopyWithoutSecrets(config).DebugString());

        TShiftedClock::FreezeTimestampFromEnv();

        const auto dimension = config.GetDimension();

        NYT::TConfig::Get()->Pool = config.GetYt().GetPool();
        auto client = NYT::CreateClient(config.GetYt().GetProxy());
        auto tx = client->StartTransaction();
        const auto& mode = config.GetMode();
        const auto date = GetDate(TShiftedClock::Now());

        TString workingDir = GetWorkingDir(tx, config);

        log->info("Found last version dir: {}", workingDir);
        log->info("Reading data...");

        NYT::TYPath embeddingsPath;
        NYT::TYPath indexFile;
        NYT::TYPath dataFile;
        NYT::TYPath labelsFile;


        if (mode == ModeValue::NEW) {
            auto embeddingsDir = NYT::JoinYPaths(workingDir, TYtNodeNames().GetFreshSegmentEmbeddingsDir());
            TMaybe<TString> maybeEmbeddingsPath = GetOldestTimedTable(tx, embeddingsDir);

            auto freshFilesDir = NYT::JoinYPaths(workingDir, TYtNodeNames().GetFreshFilesDir());
            if(!tx->Exists(freshFilesDir)) {
                tx->Create(freshFilesDir, NYT::NT_MAP, NYT::TCreateOptions().Recursive(true));
            }

            if (!maybeEmbeddingsPath) {
                tx->Commit();
                log->info("Fresh embeddings not found");
                log->info("================ Finish ================");
                return 0;
            }

            embeddingsPath = NYT::JoinYPaths(workingDir, TYtNodeNames().GetFreshSegmentEmbeddingsDir(), *maybeEmbeddingsPath);
            indexFile = NYT::JoinYPaths(freshFilesDir, *maybeEmbeddingsPath, TYtNodeNames().GetIndexFile());
            dataFile = NYT::JoinYPaths(freshFilesDir, *maybeEmbeddingsPath, TYtNodeNames().GetDataFile());
            labelsFile = NYT::JoinYPaths(freshFilesDir, *maybeEmbeddingsPath, TYtNodeNames().GetLabelsFile());
        } else {
            embeddingsPath = NYT::JoinYPaths(workingDir, TYtNodeNames().GetSegmentEmbeddingsTable());
            indexFile = NYT::JoinYPaths(workingDir, TYtNodeNames().GetIndexFile());
            dataFile = NYT::JoinYPaths(workingDir, TYtNodeNames().GetDataFile());
            labelsFile = NYT::JoinYPaths(workingDir, TYtNodeNames().GetLabelsFile());

            if (IsProcessed(tx, embeddingsPath)) {
                tx->Commit();
                log->info("Already processed for date: {}", date);
                log->info("================ Finish ================");
                return 0;
            }
        }

        const auto& segmentEmbeddingsData = NSegmentEmbeddingsDataReader::Read(tx, NYT::JoinYPaths(workingDir, embeddingsPath), dimension);
        const auto& data = segmentEmbeddingsData.GetData();
        const auto& labels = segmentEmbeddingsData.GetLabels();

        log->info("Building index...");

        TStorage storage(data, dimension);
        const auto& indexData = NHnsw::BuildDenseVectorIndex<TEmbeddingComponent, THnswIndexModel::TDistance>(NHnsw::THnswBuildOptions(), storage, dimension);

        log->info("Writing index file into: {}", indexFile);
        WriteIndex(tx, indexFile, indexData);

        log->info("Writing data file into: {}",dataFile);
        WriteBlob(tx, dataFile, data.Data(), data.Size());

        log->info("Writing labels files into: {}",labelsFile);
        WriteBlob(tx, labelsFile, labels.data(), labels.size() * sizeof(THnswIndexModel::TLabel));

        if (mode == ModeValue::NEW) {
            log->info("Removing processed segments embeddings table: {}", embeddingsPath);
            tx->Remove(embeddingsPath);
        } else {
            SetAttribute(tx, embeddingsPath, LAST_PROCESSED_ATTR_NAME, date);
        }

        tx->Commit();

        log->info("================ Finish ================");

        return 0;
    } catch (const std::exception& e) {
        log->error("Failed with error: {}", e.what());
    }

    log->info("================ Error ================");

    return 1;
}
