#include "helpers.h"

#include <crypta/lib/native/yt/utils/helpers.h>
#include <crypta/siberia/bin/make_user_data_encoded/lib/proto_types/table_data_types.pb.h>
#include <crypta/lib/proto/user_data/token_dict_item.pb.h>


namespace NCrypta::NSiberia::NUserDataWithDicts {
    using namespace NLab;
    using namespace NLab::NEncodedUserData;

    void MergeDictWithNewTokens(NCrypta::NLog::TLogPtr log,
                                NYT::ITransactionPtr tx,
                                const TTokenToWeightedIdDict& priorDict,
                                const NYT::TYPath& newTokensTable,
                                const TString& dstDictTable)
    {
        TTokenToWeightedIdDict newVersion = priorDict;

        auto reader = tx->CreateTableReader<TWeightedWordCount>(newTokensTable);
        for (TDictId id = newVersion.size(); reader->IsValid(); reader->Next()) {
            const auto& row = reader->GetRow();
            const auto& token = row.GetToken();

            const auto& [iter, actuallyInserted] = newVersion.emplace(token, TWeightedId{.Id = id, .Weight = static_cast<float>(row.GetWeight())});
            if (actuallyInserted) {
                ++id;
            } else {
                iter->second.Weight = row.GetWeight();
            }
        }

        log->info("Creating table {}", dstDictTable);
        tx->Create(dstDictTable, NYT::NT_TABLE, NYT::TCreateOptions().Recursive(true).Force(true));

        log->info("Writing dict contents to {}", dstDictTable);
        auto writer = tx->CreateTableWriter<TTokenDictItem>(dstDictTable);
        TTokenDictItem row;
        for (const auto& [token, tokenEntry]: newVersion) {
            row.SetToken(token);
            row.SetId(tokenEntry.Id);
            row.SetWeight(tokenEntry.Weight);
            writer->AddRow(row);
        }
        log->info("Finishing writing");
        writer->Finish();

        const auto& idField = YT_FIELD(TTokenDictItem, Id);
        log->info("Sorting {} by {}", dstDictTable, idField);
        tx->Sort(dstDictTable, dstDictTable, {idField});
    }
}
