#include "queue_reader.h"
#include "data.h"

#include <travel/hotels/lib/cpp/util/compress.h>
#include <travel/hotels/lib/cpp/scheduler/scheduler.h>

#include <yt/yt/client/api/client.h>
#include <yt/yt/client/api/rowset.h>
#include <yt/yt/client/table_client/unversioned_row.h>

#include <library/cpp/logger/global/global.h>
#include <util/generic/maybe.h>
#include <util/string/cast.h>

#define LOG_PFX "[" << Name_ << "] "

using namespace NTravelProto;

namespace NTravel {

template <class T1, class T2>
void CheckType(T1 actual, T2 expected) {
    if (expected != actual) {
        throw yexception() << "Invalid field type: expected " << (int)expected << ", got " << (int)actual;
    }
}

TYtQueueReader::TCounters::TCounters(TYtQueueReader& owner)
    : Owner(owner)
{
}

void TYtQueueReader::TCounters::QueryCounters(NMonitor::TCounterTable* ct) const {
    ct->insert(MAKE_COUNTER_PAIR(IsReady));
    ct->insert(MAKE_COUNTER_PAIR(NAliveClusters));
    ct->insert(MAKE_COUNTER_PAIR(NRecordsGood));
    ct->insert(MAKE_COUNTER_PAIR(NRecordsBad));
    ct->insert(MAKE_COUNTER_PAIR(NRecordsUnknown));
    ct->insert(MAKE_COUNTER_PAIR(NRecordsDeduplicated));
    ct->insert(MAKE_COUNTER_PAIR(NRecordsNotConsumed));
    ct->insert(MAKE_COUNTER_PAIR(NInvalidTrims));
    ct->insert(MAKE_COUNTER_PAIR(NReadError));
    LongNoReadError = Owner.IsLongNoReadError() ? 1 : 0;
    ct->insert(MAKE_COUNTER_PAIR(LongNoReadError));
    ct->insert(MAKE_COUNTER_PAIR(MsgUnpackNs));
    NProcessQueueSize = Owner.ProcessThreads_.Size();
    ct->insert(MAKE_COUNTER_PAIR(NProcessQueueSize));
}

void TYtQueueReader::TPerClusterCounters::QueryCounters(NMonitor::TCounterTable* ct) const {
    ct->insert(MAKE_COUNTER_PAIR(LastProcessedTimestamp));
    ct->insert(MAKE_COUNTER_PAIR(ActualizationLagSec));
}

TYtQueueReader::TYtQueueReader(const NTravelProto::NAppConfig::TConfigYtQueueReader& config, const TString& name, const TYtQueueReaderOptions& opts)
    : Config_(config)
    , ReadLimit_(config.GetReadRowLimit())
    , Name_(name)
    , Options_(opts)
    , ClientCreator_(config.GetUser(), config.GetYtTokenPath())
    , Counters_(*this)
    , PerClusterCounters_({"YtCluster"})
    , IsReady_(false)
    , InitialReadDoneCount_(0)
    , DeduplicationService_(TDuration::Seconds(Config_.GetDeduplicationExpirationPeriodSec()), opts.EnableDeduplication)
    , Cache_(Config_)
{
}

TYtQueueReader::~TYtQueueReader() {
    Stop();
}

void TYtQueueReader::RegisterCounters(NMonitor::TCounterSource& source) {
    source.RegisterSource(&Counters_, Name_);
    source.RegisterSource(&PerClusterCounters_, Name_ + "PerCluster");
    Cache_.RegisterCounters(source, Name_ + "Cache");
    DeduplicationService_.RegisterCounters(source, Name_ + "Deduplication");
}

bool TYtQueueReader::IsReady() const {
    return IsReady_;
}

bool TYtQueueReader::IsLongNoReadError() const {
    bool isReady = IsReady();
    if (!isReady) {
        // Пока не готов - не сообщаем ошибку
        return false;
    }
    TDuration d = TDuration::Seconds(Config_.GetNoReadErrorTimeoutSec());
    with_lock (LastSuccessfulReadLock_) {
        return LastSuccessfulRead_.Get() > d;
    }
}

void TYtQueueReader::StartThreadFunc() {
    RecordMaxAge_ = DetermineRecordMaxAge();
    if (StopFlag_) {
        return;
    }
    DEBUG_LOG << LOG_PFX << "RecordMaxAge for table " << Config_.GetTablePath() << ": " << RecordMaxAge_ << Endl;
    InitialCacheRead();
    if (StopFlag_) {
        return;
    }
    NTravel::TScheduler::Instance().EnqueuePeriodical(
        TDuration::Seconds(Config_.GetCacheCleanupPeriodSec()),
        [this]() {
           Cache_.DeleteOldRows(TInstant::Now() - TDuration::Seconds(Config_.GetAdditionalExpireTimeSec()), RecordMaxAge_, Options_.CheckExpireTimestamp);
        });
    for (auto it = Clusters_.begin(); it != Clusters_.end(); ++it) {
        it->second.Thread = SystemThreadFactory()->Run([this, it]() {
            ThreadFunc(it->second);
        });
    }
    if (Clusters_.empty()) {
        SetIsReady();
    }
}

void TYtQueueReader::Start() {
    for (const TString& clusterName: Config_.GetClusterName()) {
        TCluster c;
        c.Name = clusterName;
        c.PrintName = "[" + clusterName + "]";
        try {
            INFO_LOG << LOG_PFX << "Creating client for cluster " << c.PrintName << Endl;
            c.YtClient = ClientCreator_.CreateClient(c.Name);
        } catch (...) {
            ERROR_LOG << LOG_PFX << "Failed to create client for cluster " << c.PrintName << ", Error: " << CurrentExceptionMessage() << Endl;
        }
        Clusters_.insert(std::make_pair(clusterName, c));
    }
    InitialReadClusterCount_ = Config_.GetInitialReadClustersRatio() * Clusters_.size();
    if (Clusters_.size() > 0 && InitialReadClusterCount_ == 0) {
        // Исправляем округления у нуля
        InitialReadClusterCount_ = 1;
    }
    INFO_LOG << LOG_PFX << "Initial read is expected to happen from " << InitialReadClusterCount_ << " clusters" << Endl;

    Cache_.Start();
    ProcessThreads_.Start(Config_.GetNumPoolThreads());
    StartThread_ = SystemThreadFactory()->Run([this]() { StartThreadFunc(); });
}

void TYtQueueReader::Stop() {
    if (StopFlag_) {
        return;
    }
    StopFlag_.Set();
    if (StartThread_) {
        StartThread_->Join();
        StartThread_.Reset();
    }
    for (auto it = Clusters_.begin(); it != Clusters_.end(); ++it) {
        it->second.WakeUp.Signal();
    }
    for (auto it = Clusters_.begin(); it != Clusters_.end(); ++it) {
        if (it->second.Thread) {
            it->second.Thread->Join();
            it->second.Thread.Reset();
        }
    }
    ProcessThreads_.Stop();
    Cache_.Stop();
}

void TYtQueueReader::Ignore(const NProtoBuf::Message& message) {
    Subscribe(message, nullptr);
}

void TYtQueueReader::Subscribe(const NProtoBuf::Message& message, TRecordHandler handler) {
    Handlers_[message.GetDescriptor()->full_name()] = handler;
}

void TYtQueueReader::RegisterConverter(const NProtoBuf::Message& message, TRecordConverter converter) {
    Converters_[message.GetDescriptor()->full_name()] = converter;
}

void TYtQueueReader::SetReadinessNotifier(const TReadinessNotifier& notifier) {
    ReadinessNotifier_ = notifier;
}

void TYtQueueReader::SetIsReady() {
    if (IsReady_.TrySet()) {
        Counters_.IsReady = 1;
        INFO_LOG << LOG_PFX << "Initial read is completed, bus is ready" << Endl;
        if (ReadinessNotifier_) {
            ReadinessNotifier_();
        }
    }
}

void TYtQueueReader::ThreadFunc(TCluster& cluster) {
    int tabletIndex = 0; // TODO(luxaeterna): iterate tabletIndex range

    MaybeInitCurrentRowFromYt(cluster, tabletIndex);

    auto start = Now();
    while (!StopFlag_) {
        DEBUG_LOG << LOG_PFX << "Trying initial read from " << cluster.PrintName << Endl;
        if (ReadTable(cluster, tabletIndex)) {
            INFO_LOG << LOG_PFX << "Initial read done from " << cluster.PrintName << " (took " << Now() - start << ")" << Endl;
            if (AtomicIncrement(InitialReadDoneCount_) >= static_cast<TAtomicBase>(InitialReadClusterCount_)) {
                SetIsReady();
            }
            break;
        }
        cluster.WakeUp.WaitT(TDuration::Seconds(1));
    }

    size_t errorCnt = 0;
    while (!StopFlag_) {
        TInstant started = Now();
        if (ReadTable(cluster, tabletIndex)) {
            errorCnt = 0;
            cluster.WakeUp.WaitD(started + TDuration::MilliSeconds(Config_.GetPollPeriodMSec()));
        } else {
            ++errorCnt;
            cluster.WakeUp.WaitT(errorCnt < 10 ? TDuration::MilliSeconds(1) : TDuration::Seconds(1));
        }
    }
}

void TYtQueueReader::ChangeClusterAlive(TCluster& cluster, bool isAlive) {
    if (cluster.IsAlive == isAlive) {
        if (isAlive) {
            DEBUG_LOG << LOG_PFX << "Cluster " << cluster.PrintName << " is still alive" << Endl;
        } else {
            DEBUG_LOG << LOG_PFX << "Cluster " << cluster.PrintName << " is still DEAD" << Endl;
        }
    } else {
        cluster.IsAlive = isAlive;
        if (isAlive) {
            Counters_.NAliveClusters.Inc();
            INFO_LOG << LOG_PFX << "Cluster " << cluster.PrintName << " is now alive!" << Endl;
        } else {
            Counters_.NAliveClusters.Dec();
            ERROR_LOG << LOG_PFX << "Cluster " << cluster.PrintName << " is now DEAD" << Endl;
        }
    }
}

void TYtQueueReader::InitialCacheRead() {
    TProfileTimer started;
    INFO_LOG << LOG_PFX << "Initial read from cache started" << Endl;
    auto tooOld = TInstant::Now() - RecordMaxAge_ - TDuration::Seconds(Config_.GetAdditionalExpireTimeSec());
    auto initializationTailSizeSec = Config_.GetInitializationTailSizeSec();
    if (initializationTailSizeSec) {
        auto initializationTailSize = TDuration::Seconds(initializationTailSizeSec);
        tooOld = TInstant::Now() - Min(RecordMaxAge_, initializationTailSize);

        INFO_LOG << LOG_PFX << "Using initialization tail size " << initializationTailSize << " and reading cache from " << tooOld << Endl;

        auto useCache = true;
        Cache_.ReadRows(tooOld, [this, &tooOld, &useCache](const TYtQueueMessagePacked& message) {
            if (tooOld + TDuration::Seconds(5) < message.Timestamp) {
                useCache = false;
                WARNING_LOG << LOG_PFX << "Initial read from cache is impossible because cache is younger than InitializationTailSizeSec (first cache timestamp: " << message.Timestamp << ", tooOld border: " << tooOld << ")" << Endl;
            }
            return false;
        });
        if (!useCache) {
            return;
        }
    }

    size_t numberOfRows = 0;
    // See ReadTable() comments for a guidance on the concurrency implementation
    std::atomic<size_t> numTasks = 1;
    TAutoEvent syncBarrier;
    Cache_.ReadRows(tooOld, [this, &numberOfRows, syncBarrier, &numTasks]
                    (const TYtQueueMessagePacked& message) {
        if (StopFlag_) {
            INFO_LOG << LOG_PFX << "Initial read from cache interrupted by STOP condition" << Endl;
            return false;
        }

        if (!DeduplicationService_.CheckIfKeyNewAndRememberIt(message.MessageId, message.Timestamp)) {
            WARNING_LOG << LOG_PFX << "Found duplicate in cache. MessageId: " << message.MessageId << ", Timestamp: " << message.Timestamp << Endl;
            return true;
        }

        TRecordHandler* handlerPtr = Handlers_.FindPtr(message.MessageType);
        if (!handlerPtr) {
            WARNING_LOG << LOG_PFX << "Unknown message type '" << message.MessageType << "'" << Endl;
            return true;
        }

        if (*handlerPtr) {
            ++numTasks;
            ProcessThreads_.SafeAddFunc([this, numberOfRows, message, handlerPtr, syncBarrier, &numTasks]() mutable {
                try {
                    TYtQueueMessage messageUnpacked;
                    UnpackMessage(message, messageUnpacked);
                    (*handlerPtr)(messageUnpacked);
                } catch (...) {
                    ERROR_LOG << LOG_PFX << "Failed to read record from cache. Cluster " << message.Origin.ClusterName << ", row "
                              << numberOfRows << ": " << CurrentExceptionMessage() << Endl;
                    Counters_.NRecordsBad.Inc();
                }
                if (--numTasks == 0) {
                    syncBarrier.Signal();
                }
            });
        }

        ++numberOfRows;
        return true;
    });
    if (--numTasks > 0) {
        syncBarrier.WaitI();
    }
    INFO_LOG << LOG_PFX << numberOfRows << " rows read from cache " << Endl;
    numberOfRows = 0;
    Cache_.ReadIndicies([this, &numberOfRows](const TYtQueueMessageOrigin& origin) {
        auto it = Clusters_.find(origin.ClusterName);
        if (it != Clusters_.end()) {
            it->second.CurrentRow = origin.RowIndex + 1;
        }
        Y_UNUSED(origin.TabletIndex); // TODO(luxaeterna): HashMap<i64, i64> cluster.CurrentRows[tabletIndex]
        ++numberOfRows;// TODO (alexcrush) make it result of Cache_.*
    });
    INFO_LOG << LOG_PFX << numberOfRows << " indices read from cache" << Endl;
    INFO_LOG << LOG_PFX << "Initial read from cache finished in " << started.Get() << Endl;
}

bool TYtQueueReader::ReadTable(TCluster& cluster, int tabletIndex) {
    try {
        auto tabletInfo = GetTabletInfo(cluster, tabletIndex);
        ChangeClusterAlive(cluster, true);
        if (cluster.CurrentRow < tabletInfo.TrimmedRowCount) {
            cluster.CurrentRow = tabletInfo.TrimmedRowCount;
        }
        while (cluster.CurrentRow < tabletInfo.TotalRowCount) {
            DEBUG_LOG << LOG_PFX << "There are " << (tabletInfo.TotalRowCount - cluster.CurrentRow) << " rows to read from cluster " << cluster.PrintName << Endl;
            if (StopFlag_) {
                return false;
            }
            i64 rowIndexMax = ReadTable(cluster, tabletIndex, cluster.CurrentRow, cluster.CurrentRow + ReadLimit_);
            if (rowIndexMax >= 0) {
                cluster.CurrentRow = rowIndexMax + 1;
            } else {
                // A tablet can be trimmed during the reading, so nothing would be read.
                // In this case we advance just for ReadLimit_ rows
                cluster.CurrentRow += ReadLimit_;
                if (cluster.CurrentRow > tabletInfo.TotalRowCount) {
                    cluster.CurrentRow = tabletInfo.TotalRowCount;
                }
            }
        }
        return true;
    } catch (...) {
        ERROR_LOG << LOG_PFX << "Error while read at cluster " << cluster.PrintName << ": " << CurrentExceptionMessage() << Endl;
        ChangeClusterAlive(cluster, false);
    }
    return false;
}

TDuration TYtQueueReader::DetermineRecordMaxAge() {
    auto shortTimeout = TDuration::Seconds(1);
    auto attributePath = Config_.GetTablePath() + "/@max_data_ttl";
    auto it = Clusters_.begin();
    if (it == Clusters_.end()) {
        return {};
    }
    while (!StopFlag_) {
        auto& cluster = it->second;
        try {
            return TDuration::MilliSeconds(NYT::NYTree::ConvertTo<i64>(cluster.YtClient->GetNode(attributePath).WithTimeout(shortTimeout).Get().ValueOrThrow()));
        } catch (...) {
            ERROR_LOG << LOG_PFX << "Error while read at cluster " << cluster.PrintName << ": " << CurrentExceptionMessage() << Endl;
        }
        if (++it == Clusters_.end()) {
            it = Clusters_.begin();
            cluster.WakeUp.WaitT(shortTimeout);
        }
    }
    return {};
}

NYT::NApi::TTabletInfo TYtQueueReader::GetTabletInfo(const TCluster& cluster, int tabletIndex) const {
    TDuration timeout = TDuration::MilliSeconds(Config_.GetGetRowsOpTimeoutMSec());
    auto result = cluster.YtClient->GetTabletInfos(Config_.GetTablePath(), {tabletIndex}).WithTimeout(timeout).Get();
    if (!result.IsOK()) {
        throw yexception() << "Cannot do GetTabletInfos: " << ToString(result);
    }
    if (result.Value().size() != 1) {
        throw yexception() << "Invalid result from GetTabletInfos: count " << result.Value().size() << " != 1";
    }
    return result.Value()[0];
}

i64 TYtQueueReader::FindApproximateLowerBoundByTimestamp(TCluster& cluster, int tabletIndex, TInstant timestamp) const {
    DEBUG_LOG << LOG_PFX << "Going to find lower bound in " << cluster.PrintName << " for " << timestamp << Endl;

    TProfileTimer started;
    auto tabletInfo = GetTabletInfo(cluster, tabletIndex);
    auto lInclusive = tabletInfo.TrimmedRowCount;
    auto rExclusive = tabletInfo.TotalRowCount;
    DEBUG_LOG << LOG_PFX << "Assuming l = " << lInclusive << " (TrimmedRowCount) and r = " << rExclusive << " (TotalRowCount) in " << cluster.PrintName << Endl;

    while (lInclusive + 1 < rExclusive) {
        auto middle = (lInclusive + rExclusive) / 2;
        auto middleTimestamp = GetRecordTimestamp(cluster, tabletIndex, middle);
        if (middleTimestamp.GetOrElse(TInstant::Zero()) < timestamp) {
            lInclusive = middle;
        } else {
            rExclusive = middle;
        }
    }
    DEBUG_LOG << LOG_PFX << "Search stopped at l = " << lInclusive << " and r = " << rExclusive << " in " << cluster.PrintName << " and took " << started.Get() << Endl;
    DEBUG_LOG << LOG_PFX << "Assuming " << rExclusive << " (with timestamp " << GetRecordTimestamp(cluster, tabletIndex, rExclusive).GetOrElse(TInstant::Seconds(0)) << ") is the desired lower bound for " << timestamp << Endl;
    return rExclusive;
}

TMaybe<TInstant> TYtQueueReader::GetRecordTimestamp(TCluster& cluster, int tabletIndex, i64 rowIndex) const {
    TString query = "Timestamp";
    query += " FROM [" + Config_.GetTablePath() + "]";
    query += " WHERE [$tablet_index] = " + ToString(tabletIndex) + " AND [$row_index] = " + ToString(rowIndex);

    DEBUG_LOG << LOG_PFX << "Binary search: determine row index at " << cluster.PrintName << ": " << query << Endl;
    TDuration timeout = TDuration::MilliSeconds(Config_.GetReadTimeoutMSec());
    NYT::TErrorOr<NYT::NApi::TSelectRowsResult> result = cluster.YtClient->SelectRows(query).WithTimeout(timeout).Get();
    if (!result.IsOK()) {
        Counters_.NReadError.Inc();
        throw yexception() << "Failed to read from " << cluster.PrintName << ": " << ToString(result);
    }

    const auto& rows = result.Value().Rowset->GetRows();
    if (rows.Empty()){
        return {};
    }

    const auto& row = rows[0];
    CheckType(row[0].Type, NYT::NTableClient::EValueType::Uint64);
    return TInstant::MilliSeconds(row[0].Data.Uint64);
}


i64 TYtQueueReader::ReadTable(const TCluster& cluster, int tabletIndex, i64 rowFrom, i64 rowTo) {
    TString query = "MessageId, [$row_index], Timestamp, MessageType, Codec, Bytes, ExpireTimestamp";
    query += " FROM [" + Config_.GetTablePath() + "]";
    query += " WHERE [$tablet_index] = " + ToString(tabletIndex) + " ";
    query += " AND [$row_index] >= " + ToString(rowFrom);
    query += " AND [$row_index] < " + ToString(rowTo);
    if (Options_.CheckExpireTimestamp) {
        const TInstant maxExpireTime = ::Now() - TDuration::Seconds(Config_.GetAdditionalExpireTimeSec());
        query += " AND (ExpireTimestamp = NULL OR ExpireTimestamp > " + ToString(maxExpireTime.MilliSeconds()) + ")";
    }
    TProfileTimer started;
    TProfileTimer startedRel;
    DEBUG_LOG << LOG_PFX << "Start reading at " << cluster.PrintName << ": " << query << Endl;
    TDuration timeout = TDuration::MilliSeconds(Config_.GetReadTimeoutMSec());
    NYT::TErrorOr<NYT::NApi::TSelectRowsResult> result = cluster.YtClient->SelectRows(query).WithTimeout(timeout).Get();
    if (!result.IsOK()) {
        Counters_.NReadError.Inc();
        throw yexception() << "Failed to read from " << cluster.PrintName << ": " << ToString(result);
    }
    const auto& rows = result.Value().Rowset->GetRows();
    DEBUG_LOG << LOG_PFX << "Rows fetched from " << cluster.PrintName << ", total duration " << started.Get()
              << ", relative duration " << startedRel.Step() << Endl;
    constexpr size_t cIdMessageId = 0;
    constexpr size_t cIdRowIndex = 1;
    constexpr size_t cIdTimestamp = 2;
    constexpr size_t cIdMessageType = 3;
    constexpr size_t cIdCodec = 4;
    constexpr size_t cIdBytes = 5;
    constexpr size_t cIdExpireTimestamp = 6;
    TVector<TMaybe<TYtQueueMessagePacked>> allMessages(rows.size());
    i64 rowIndexMax = -1;
    TYtQueueMessagePacked message;
    message.Origin.ClusterName = cluster.Name;
    message.Origin.TabletIndex = tabletIndex;
    std::atomic<size_t> numTasks = 1; // Make sure it does not go to zero until all tasks are added to the pool
    TAutoEvent syncBarrier; // TAutoEvent should be passed to other threads by value to prevent potential problems (see util/system/event.h)
    for (size_t rowIndex = 0; rowIndex < rows.size(); ++rowIndex) {
        const auto& row = rows[rowIndex];
        message.Origin.RowIndex = -1;

        CheckType(row[cIdRowIndex].Type, NYT::NTableClient::EValueType::Int64);
        message.Origin.RowIndex = row[cIdRowIndex].Data.Int64;

        CheckType(row[cIdMessageId].Type, NYT::NTableClient::EValueType::String);
        message.MessageId = row[cIdMessageId].AsString();
        if (!DeduplicationService_.CheckIfKeyNewAndRememberIt(message.MessageId, message.Timestamp)) {
            Counters_.NRecordsDeduplicated.Inc();
            continue;
        }

        CheckType(row[cIdMessageType].Type, NYT::NTableClient::EValueType::String);
        message.MessageType = row[cIdMessageType].AsString();
        TRecordConverter* converterPtr = Converters_.FindPtr(message.MessageType);
        TRecordHandler* handlerPtr = Handlers_.FindPtr(message.MessageType);
        if (!converterPtr && !handlerPtr) {
            WARNING_LOG << LOG_PFX << "Unknown message type '" << message.MessageType << "'" << Endl;
            Counters_.NRecordsUnknown.Inc();
            continue;
        }

        CheckType(row[cIdTimestamp].Type, NYT::NTableClient::EValueType::Uint64);
        message.Timestamp = TInstant::MilliSeconds(row[cIdTimestamp].Data.Uint64);

        if (row[cIdExpireTimestamp].Type == NYT::NTableClient::EValueType::Null) {
            message.ExpireTimestamp = TInstant();
        } else {
            CheckType(row[cIdExpireTimestamp].Type, NYT::NTableClient::EValueType::Uint64);
            message.ExpireTimestamp = TInstant::MilliSeconds(row[cIdExpireTimestamp].Data.Uint64);
        }

        CheckType(row[cIdCodec].Type, NYT::NTableClient::EValueType::Uint64);
        message.Codec = (EMessageCodec)row[cIdCodec].Data.Uint64;

        CheckType(row[cIdBytes].Type, NYT::NTableClient::EValueType::String);
        message.BytesPacked = row[cIdBytes].AsString();

        ++numTasks;
        // Make lambda mutable to call a non-const TAutoEvent::Signal() method, and to modify message and handlerPtr
        ProcessThreads_.SafeAddFunc([this, rowIndex, message, converterPtr, handlerPtr, syncBarrier, &numTasks, &cluster, &allMessages]() mutable {
            try {
                TYtQueueMessage messageUnpacked;
                UnpackMessage(message, messageUnpacked);
                if (converterPtr) {
                    (*converterPtr)(messageUnpacked);
                    if (message.MessageType != messageUnpacked.MessageType) {
                        handlerPtr = Handlers_.FindPtr(messageUnpacked.MessageType);
                    }
                    PackMessage(messageUnpacked, message);
                }
                if (handlerPtr) {
                    if (*handlerPtr) {
                        if ((*handlerPtr)(messageUnpacked)) {
                            Counters_.NRecordsGood.Inc();
                            allMessages[rowIndex] = message;
                        } else {
                            Counters_.NRecordsNotConsumed.Inc();
                        }
                    } else {
                        Counters_.NRecordsNotConsumed.Inc();
                    }
                }
            } catch (...) {
                ERROR_LOG << LOG_PFX << "Failed to read record at cluster " << cluster.PrintName << " at row "
                          << message.Origin.RowIndex << ": " << CurrentExceptionMessage() << Endl;
                Counters_.NRecordsBad.Inc();
            }
            if (--numTasks == 0) {
                syncBarrier.Signal();
            }
        });

        if (rowIndexMax < message.Origin.RowIndex) {
            rowIndexMax = message.Origin.RowIndex;
        }

        PerClusterCounters_.GetOrCreate({cluster.Name})->ActualizationLagSec = (Now() - message.Timestamp).Seconds();
        PerClusterCounters_.GetOrCreate({cluster.Name})->LastProcessedTimestamp = message.Timestamp;
    }
    DEBUG_LOG << LOG_PFX << "Jobs scheduled for " << cluster.PrintName << ", total duration " << started.Get()
              << ", relative duration " << startedRel.Step() << Endl;
    if (--numTasks > 0) {
        syncBarrier.WaitI();
    }
    TVector<TYtQueueMessagePacked> consumedMessages;
    for (const auto& maybeMessage : allMessages) {
        if (maybeMessage) {
            consumedMessages.emplace_back(std::move(*maybeMessage));
        }
    }
    if (rowIndexMax >= 0) {
        message.Origin.RowIndex = rowIndexMax;
        DEBUG_LOG << LOG_PFX << "Writing " << consumedMessages.size() << " messages to cache" << Endl;
        Cache_.Write(std::move(consumedMessages), message.Origin);
    }
    with_lock (LastSuccessfulReadLock_) {
        LastSuccessfulRead_.Reset();
    }
    DEBUG_LOG << LOG_PFX << "Read from " << cluster.PrintName << ". Rows " << rows.size()
              << ", total duration " << started.Get()
              << ", relative duration " << startedRel.Step()
              << ", max row index is " << rowIndexMax << Endl;
    return rowIndexMax;
}

void TYtQueueReader::TrimTable(const TYtQueueMessageOrigin& recordOrigin) {
    auto it = Clusters_.find(recordOrigin.ClusterName);
    if (it == Clusters_.end()) {
        ERROR_LOG << LOG_PFX << "Unknown cluster in TrimTable: " << recordOrigin.ClusterName << Endl;
        Counters_.NInvalidTrims.Inc();
    } else {
        const auto& client = it->second.YtClient;
        if (!client) {
            ERROR_LOG << LOG_PFX << "YT client for cluster " << recordOrigin.ClusterName << " is dead" << Endl;
            Counters_.NInvalidTrims.Inc();
        } else {
            Cache_.Trim(recordOrigin);
            client->TrimTable(Config_.GetTablePath(), recordOrigin.TabletIndex, recordOrigin.RowIndex + 1);
        }
    }
}

void TYtQueueReader::UnpackMessage(const TYtQueueMessagePacked& msg, TYtQueueMessage& msgUnpacked) const {
    msgUnpacked.Bytes = Decompress(msg.Codec, msg.BytesPacked);
    msgUnpacked.MessageId = msg.MessageId;
    msgUnpacked.MessageType = msg.MessageType;
    msgUnpacked.Timestamp = msg.Timestamp;
    msgUnpacked.ExpireTimestamp = msg.ExpireTimestamp;
    msgUnpacked.Origin = msg.Origin;
}

void TYtQueueReader::PackMessage(const TYtQueueMessage& msgUnpacked, TYtQueueMessagePacked& msg) const {
    msg.Timestamp = msgUnpacked.Timestamp;
    msg.ExpireTimestamp = msgUnpacked.ExpireTimestamp;
    msg.MessageType = msgUnpacked.MessageType;
    msg.Codec = MC_NONE; // TODO compress?
    msg.BytesPacked = msgUnpacked.Bytes;
    msg.MessageId = msgUnpacked.MessageId;
    msg.Origin = msgUnpacked.Origin;
}

TString TYtQueueReader::Decompress(EMessageCodec codec, TString bytes) const {
    TString res;
    TProfileTimer timer;
    switch (codec) {
        case MC_NONE:
            res = bytes;
            break;
        case MC_ZLIB:
            res = ZLibDecompress(bytes, ZLib::StreamType::Auto);
            break;
        case MC_ZSTD:
            res = ZStdDecompress(bytes);
            break;
        default:
            throw yexception() << "Unknown MessageCodec: " << (int)codec;
    }
    Counters_.MsgUnpackNs += timer.Step().NanoSeconds();
    return res;
}

void TYtQueueReader::MaybeInitCurrentRowFromYt(TCluster& cluster, int tabletIndex) {
    auto initializationTailSizeSec = Config_.GetInitializationTailSizeSec();
    if (!initializationTailSizeSec) {
        return;
    }
    auto totalAttempts = 10;
    for (int attempt = 0; attempt < totalAttempts; attempt++) {
        try {
            auto initializationTailSize = TDuration::Seconds(initializationTailSizeSec);
            INFO_LOG << LOG_PFX << "Using initialization tail size " << initializationTailSize << " and going to get initial row index from yt" << Endl;
            auto borderTimestamp = Now() - initializationTailSize
                - TDuration::Minutes(1); // Adding one minute here because the bus is not strictly ordered by timestamp, so we read a little more and filter later
            auto initialRowIndex = FindApproximateLowerBoundByTimestamp(cluster, tabletIndex, borderTimestamp);
            INFO_LOG << LOG_PFX << "Got initial row index from yt: " << initialRowIndex << Endl;
            if (cluster.CurrentRow < initialRowIndex) {
                INFO_LOG << LOG_PFX << "Updating currentRow (" << cluster.CurrentRow << ") with initial row from yt: " << initialRowIndex << Endl;
                cluster.CurrentRow = initialRowIndex;
            } else {
                INFO_LOG << LOG_PFX << "Not using initial row from yt (" << initialRowIndex << ") because it is less than currentRow (" << cluster.CurrentRow << ")" << Endl;
            }
            return;
        } catch (...) {
            WARNING_LOG << LOG_PFX << "Error while calculating initialization tail size in cluster " << cluster.PrintName << " (attempt " << attempt + 1 << "/" << totalAttempts << "): "
                        << CurrentExceptionMessage() << Endl;
            ChangeClusterAlive(cluster, false);
            Sleep(TDuration::Seconds(1));
        }
    }
    ERROR_LOG << LOG_PFX << "Falling back to initialization without optimizations in " << cluster.PrintName << " after " << totalAttempts << " attempts " << Endl;
}

}// namespace NTravel
