package ru.yandex.travel.workflow;

import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ConcurrentSkipListSet;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.google.protobuf.Descriptors;
import com.google.protobuf.Message;
import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.Gauge;
import io.micrometer.core.instrument.Metrics;
import io.micrometer.core.instrument.Timer;
import lombok.RequiredArgsConstructor;
import lombok.Value;
import lombok.experimental.NonFinal;
import lombok.extern.slf4j.Slf4j;
import org.springframework.dao.ConcurrencyFailureException;
import org.springframework.transaction.TransactionDefinition;
import org.springframework.transaction.support.DefaultTransactionDefinition;

import ru.yandex.bolts.collection.Option;
import ru.yandex.misc.lang.StringUtils;
import ru.yandex.travel.commons.concurrent.FutureUtils;
import ru.yandex.travel.commons.logging.NestedMdc;
import ru.yandex.travel.commons.metrics.MetricsUtils;
import ru.yandex.travel.commons.proto.ProtoUtils;
import ru.yandex.travel.spring.tx.ForcedRollbackTxManagerWrapper;
import ru.yandex.travel.spring.tx.ForcedRollbackTxTemplate;
import ru.yandex.travel.task_processor.PausableTerminationSemaphoreWithBuckets;
import ru.yandex.travel.tx.utils.TransactionMandatory;
import ru.yandex.travel.workflow.entities.Workflow;
import ru.yandex.travel.workflow.entities.WorkflowEvent;
import ru.yandex.travel.workflow.exceptions.ProcessingStoppedException;
import ru.yandex.travel.workflow.exceptions.RetryableException;
import ru.yandex.travel.workflow.exceptions.WorkflowCrashedException;
import ru.yandex.travel.workflow.exceptions.WorkflowIsNotRunningException;
import ru.yandex.travel.workflow.logging.EEventType;
import ru.yandex.travel.workflow.logging.TEventSolomonProperties;
import ru.yandex.travel.workflow.logging.TWorkflowLoggingEvent;
import ru.yandex.travel.workflow.repository.WorkflowRepository;

import static java.util.stream.Collectors.toMap;
import static ru.yandex.travel.commons.logging.CommonMdcParams.MDC_ENTITY_ID;
import static ru.yandex.travel.commons.logging.CommonMdcParams.MDC_ENTITY_TYPE;

/**
 * Event processing service
 * drains event queue for workflow
 * for each event a lock is obtained
 * processing of each event uses event handler matcher
 * event handler examples (state machine handler, reserve action handler )
 * after event handling new events are added to workflow queue
 * Release lock
 */
@Slf4j
public class WorkflowProcessServiceV2 implements WorkflowProcessService {
    private static final Set<EventProcessingResultType> MEASURED_OUTCOMES =
            Set.of(EventProcessingResultType.SUCCESS, EventProcessingResultType.CRASH, EventProcessingResultType.RETRY);
    private static final Duration[] WORKFLOW_MESSAGE_PROCESSING_SLA = {
            Duration.ofMillis(100), Duration.ofMillis(200), Duration.ofMillis(300), Duration.ofMillis(400),
            Duration.ofMillis(500),
            Duration.ofSeconds(1), Duration.ofSeconds(2), Duration.ofSeconds(3), Duration.ofSeconds(4),
            Duration.ofSeconds(5),
            Duration.ofSeconds(10), Duration.ofSeconds(15), Duration.ofSeconds(30), Duration.ofSeconds(60),
            Duration.ofSeconds(90),
            Duration.ofSeconds(120), Duration.ofSeconds(150), Duration.ofSeconds(200), Duration.ofSeconds(250),
            Duration.ofSeconds(300)
    };
    private static final Duration[] SCHEDULE_DURATION_SLA = {
            Duration.ofMillis(1), Duration.ofMillis(10),
            Duration.ofMillis(20), Duration.ofMillis(30), Duration.ofMillis(40), Duration.ofMillis(50),
            Duration.ofMillis(100), Duration.ofMillis(200), Duration.ofMillis(300), Duration.ofMillis(400),
            Duration.ofMillis(500),
            Duration.ofMillis(1000), Duration.ofMillis(10000), Duration.ofMillis(100000),
    };
    private final WorkflowRepository workflowRepository;
    private final WorkflowEventHandlerMatcher workflowEventHandlerMatcher;
    private final MessagingContextFactory messagingContextFactory;
    private final ExecutorService workflowProcessingPool;
    private final PausableTerminationSemaphoreWithBuckets pausableSemaphore;
    private final WorkflowEventQueue workflowEventQueue;
    private final ForcedRollbackTxTemplate transactionOperation;
    private final WorkflowEventRetryStrategy workflowEventRetryStrategy;
    private final Duration concurrencyFailureMaxTimeout;
    private final int concurrencyFailureRetryCount;
    private final int delayedRetriesLimit;
    private final int defaultProcessingPoolId;
    private final ConcurrentMap<UUID, ConcurrentHashMap<Long, CompletableFuture<Void>>> pendingEvents =
            new ConcurrentHashMap<>();
    private final ConcurrentMap<EventProcessingMeterKey, EventProcessingMeter> eventProcessingMeters =
            new ConcurrentHashMap<>();
    private final ConcurrentMap<EventProcessingTaggedMeterKey, EventProcessingTaggedMeter> eventProcessingTaggedMeters =
            new ConcurrentHashMap<>();
    private final ConcurrentSkipListSet<UUID> lockedWorkflows;
    private final ScheduledExecutorService scheduleExecutor;
    private final PendingWorkflowsFetcher pendingWorkflowsFetcher;
    private final ProcessingMetrics metrics;
    private final Counter severeErrorCounter =
            Counter.builder("workflow.processing.severeError").register(Metrics.globalRegistry);
    private final Timer workflowsSchedulingTimerSuccess =
            Timer.builder("workflow.processing.scheduleTime").tag("outcome", "success")
                    .publishPercentileHistogram(true)
                    .serviceLevelObjectives(SCHEDULE_DURATION_SLA)
                    .publishPercentiles(MetricsUtils.higherPercentiles())
                    .register(Metrics.globalRegistry);
    private final Timer workflowsSchedulingTimerFailure =
            Timer.builder("workflow.processing.scheduleTime").tag("outcome", "failure")
                    .publishPercentileHistogram(true)
                    .serviceLevelObjectives(SCHEDULE_DURATION_SLA)
                    .publishPercentiles(MetricsUtils.higherPercentiles())
                    .register(Metrics.globalRegistry);
    private final WorkflowProcessingListener workflowProcessingListener;

    public WorkflowProcessServiceV2(WorkflowEventHandlerMatcher workflowEventHandlerMatcher,
                                    WorkflowRepository workflowRepository,
                                    WorkflowEventQueue workflowEventQueue,
                                    ForcedRollbackTxManagerWrapper forcedRollbackTxManagerWrapper,
                                    MessagingContextFactory messagingContextFactory,
                                    WorkflowEventRetryStrategy workflowEventRetryStrategy,
                                    WorkflowProcessingListener workflowProcessingListener,
                                    PendingWorkflowsFetcher pendingWorkflowsFetcher,
                                    WorkflowProcessingProperties workflowProcessingProperties) {
        DefaultTransactionDefinition transactionDefinition = new DefaultTransactionDefinition();
        transactionDefinition.setName("WorkflowProcessService");
        transactionDefinition.setPropagationBehavior(TransactionDefinition.PROPAGATION_REQUIRES_NEW);
        transactionDefinition.setIsolationLevel(TransactionDefinition.ISOLATION_REPEATABLE_READ);
        List<Pool> pools = workflowProcessingProperties.getProcessingPools().getPools().entrySet().stream().map(
                (entry) -> Pool.builder()
                        .id(entry.getKey())
                        .name(entry.getValue().getName())
                        .threads(entry.getValue().getThreads())
                        .entityTypes(entry.getValue().getEntityTypes())
                        .build()
        ).collect(Collectors.toUnmodifiableList());
        int defaultPoolId = workflowProcessingProperties.getProcessingPools().getDefaultPoolId();
        boolean defaultPoolExists = pools.stream().anyMatch(pool -> pool.getId() == defaultPoolId);
        Preconditions.checkArgument(defaultPoolExists, "Default pool with id %s not found", defaultPoolId);
        this.defaultProcessingPoolId = workflowProcessingProperties.getProcessingPools().getDefaultPoolId();
        this.transactionOperation = new ForcedRollbackTxTemplate(forcedRollbackTxManagerWrapper, transactionDefinition);
        this.workflowRepository = workflowRepository;
        this.workflowEventHandlerMatcher = workflowEventHandlerMatcher;
        int maxWorkflows = pools.stream().mapToInt(Pool::getThreads).sum();
        this.workflowProcessingPool = Executors.newFixedThreadPool(maxWorkflows,
                new ThreadFactoryBuilder().setNameFormat("WorkflowProcessPool-%s").build());
        this.workflowEventQueue = workflowEventQueue;
        this.messagingContextFactory = messagingContextFactory;
        this.workflowEventRetryStrategy = workflowEventRetryStrategy;
        this.pausableSemaphore = new PausableTerminationSemaphoreWithBuckets("WorkflowProcessServiceBucketedSemaphore",
                pools.stream().collect(toMap(Pool::getId, Pool::getThreads)));
        this.concurrencyFailureMaxTimeout = workflowProcessingProperties.getConcurrencyFailure().getMaxTimeout();
        this.concurrencyFailureRetryCount = workflowProcessingProperties.getConcurrencyFailure().getRetryCount();
        this.delayedRetriesLimit = workflowProcessingProperties.getDelayedRetriesLimit();
        this.workflowProcessingListener = workflowProcessingListener;

        this.lockedWorkflows = new ConcurrentSkipListSet<>();
        this.pendingWorkflowsFetcher = pendingWorkflowsFetcher;

        Duration workflowPollingInterval = workflowProcessingProperties.getPendingWorkflowPollingInterval();

        metrics = registerMetrics(pools);

        this.scheduleExecutor = Executors.newSingleThreadScheduledExecutor(
                new ThreadFactoryBuilder().setDaemon(true).setNameFormat("WPS-Scheduler").build());
        // TODO (mbobrov): customize it, so it can be started from application
        this.scheduleExecutor.scheduleAtFixedRate(this::schedulePendingWorkflows, 0L,
                workflowPollingInterval.toMillis(), TimeUnit.MILLISECONDS);
    }

    private ProcessingMetrics registerMetrics(List<Pool> processingPools) {
        ImmutableMap.Builder<Integer, AtomicInteger> inProcessWorkflowsBuilder = ImmutableMap.builder();
        ImmutableMap.Builder<Integer, AtomicInteger> pendingWorkflowsBuilder = ImmutableMap.builder();
        for (Pool pool : processingPools) {
            int poolId = pool.getId();
            inProcessWorkflowsBuilder.put(poolId, new AtomicInteger(0));
            pendingWorkflowsBuilder.put(poolId, new AtomicInteger(0));
        }
        ImmutableMap<Integer, AtomicInteger> inProcessWorkflows = inProcessWorkflowsBuilder.build();
        ImmutableMap<Integer, AtomicInteger> pendingWorkflows = pendingWorkflowsBuilder.build();
        for (Pool pool : processingPools) {
            int poolId = pool.getId();
            int maxPoolWorkflows = pool.getThreads();
            Gauge.builder("orchestrator.processingPools.workflowsInProcess", () -> inProcessWorkflows.get(poolId))
                    .tag("pool", String.valueOf(pool.getId())).register(Metrics.globalRegistry);
            Gauge.builder("orchestrator.processingPools.pendingWorkflows", () -> pendingWorkflows.get(poolId))
                    .tag("pool", String.valueOf(pool.getId())).register(Metrics.globalRegistry);
            Gauge.builder("orchestrator.processingPools.maxWorkflows", () -> maxPoolWorkflows)
                    .tag("pool", String.valueOf(pool.getId())).register(Metrics.globalRegistry);
        }
        return new ProcessingMetrics(inProcessWorkflows, pendingWorkflows);
    }

    @Override
    public void stop() throws Exception {
        log.info("Destroying workflow process service");
        scheduleExecutor.shutdown();
        workflowProcessingPool.shutdown();
        pausableSemaphore.shutdown();
        pausableSemaphore.awaitTermination();
        resolveWaitingFuturesExceptionally();
        log.info("Destroyed workflow process service");
    }

    @Override
    @TransactionMandatory
    public void scheduleEvent(UUID workflowId, Message eventData) {
        Preconditions.checkNotNull(workflowId, "Workflow id must be not null");
        WorkflowEvent createdEvent = workflowEventQueue.enqueueMessage(workflowId, eventData);
        log.info("Enqueued event {} of class '{}' sent externally to workflow {}",
                createdEvent.getId(),
                createdEvent.getData().getClass().getSimpleName(),
                createdEvent.getWorkflowId());
    }

    // should be called only inside a transaction
    private int getProcessingPoolId(UUID workflowId) {
        // some caching can be applied here later
        Workflow workflow = workflowRepository.getOne(workflowId);
        return getProcessingPoolId(workflow);
    }

    private int getProcessingPoolId(Workflow workflow) {
        return getEffectiveProcessingPoolId(workflow.getProcessingPoolId());
    }

    private int getEffectiveProcessingPoolId(Integer poolId) {
        if (poolId != null && pausableSemaphore.hasBucket(poolId)) {
            return poolId;
        }
        return defaultProcessingPoolId;
    }

    /**
     * @return A future which is completed once the message is processed.
     * The future is completed exceptionally if the workflow processing service is stopped or paused.
     * The future is completed exceptionally if the workflow crashed before or during the message handling.
     * Method is intended for testing purposes only.
     */
    @VisibleForTesting
    public CompletableFuture<Void> scheduleEventWithLocalTracking(UUID workflowId, Message eventData) {
        Preconditions.checkArgument(workflowId != null, "Workflow id must be not null");
        //noinspection ConstantConditions
        int poolId = transactionOperation.execute(txStatus -> getProcessingPoolId(workflowId));
        boolean semaphoreAcquired = pausableSemaphore.acquire(poolId);
        if (!semaphoreAcquired) {
            return FutureUtils.buildExceptional(new ProcessingStoppedException());
        }
        AtomicReference<Optional<Long>> maybeEventId = new AtomicReference<>(Optional.empty());
        try {
            // Try to enqueue the message and commit the transaction.
            // Note that we are changing the `pendingEvents` mapping inside the `transactionOperation` action,
            // but the transaction rollback does not automatically revert the changes in the mapping.
            // Thus, we are memoizing the touched key in the `maybeEventId` and clear the key in the exceptional case.
            CompletableFuture<Void> resultingFuture = transactionOperation.execute((tStatus) -> {
                Optional<WorkflowEvent> maybeEvent = workflowEventQueue.enqueueMessageOnlyIfRunning(workflowId,
                        eventData);
                if (maybeEvent.isPresent()) {
                    Long eventId = maybeEvent.get().getId();
                    maybeEventId.set(Optional.of(eventId));
                    CompletableFuture<Void> newFuture = new CompletableFuture<>();
                    pendingEvents.compute(workflowId, (ignored, idToFuture) -> {
                        if (idToFuture == null) {
                            idToFuture = new ConcurrentHashMap<>();
                        }
                        CompletableFuture<Void> oldFuture = idToFuture.put(eventId, newFuture);
                        Preconditions.checkState(oldFuture == null, "Duplicate event id");
                        return idToFuture;
                    });
                    return newFuture;
                } else {
                    return FutureUtils.buildExceptional(new WorkflowIsNotRunningException());
                }
            });
            // Here, the transaction is committed (thus there is an event in the workflow queue);
            // so we are scheduling the work.
            scheduleDrainEventQueue(workflowId, poolId);
            return resultingFuture;
        } catch (Throwable ex) {
            maybeEventId.get().ifPresent(dirtyEventId -> pendingEvents.computeIfPresent(workflowId, (ignored,
                                                                                                     idToFuture) -> {
                idToFuture.remove(dirtyEventId);
                return idToFuture.isEmpty() ? null : idToFuture;
            }));
            throw ex;
        } finally {
            pausableSemaphore.release(poolId);
        }
    }

    private void scheduleDrainEventQueue(UUID workflowId, int processingPoolId) {
        Preconditions.checkArgument(workflowId != null, "Workflow id must be not null");
        boolean lockAcquired = pausableSemaphore.acquire(processingPoolId);
        if (lockAcquired) {
            if (lockedWorkflows.add(workflowId)) {
                log.debug("Scheduling drain event queue for workflow {}", workflowId);
                workflowProcessingPool.execute(() -> {
                    try {
                        drainEventQueue(workflowId);
                    } catch (Exception e) {
                        log.error("Severe error occurred while processing workflow {}", workflowId, e);
                        severeErrorCounter.increment();
                    } finally {
                        lockedWorkflows.remove(workflowId);
                        pausableSemaphore.release(processingPoolId);
                    }
                });
            } else {
                pausableSemaphore.release(processingPoolId);
                log.debug("Skipping drain event queue for workflow {} as it is already active", workflowId);
            }
        } else {
            log.debug("Skipping schedule drain event queue, as we could not acquire semaphore permit");
        }
    }

    @Override
    public synchronized void pauseAll() {
        try {
            pausableSemaphore.pause();
        } catch (InterruptedException e) {
            log.error("Pausing interrupted", e);
            Thread.currentThread().interrupt(); // preserve interrupted status
        } finally {
            resolveWaitingFuturesExceptionally();
        }
    }

    private void resolveWaitingFuturesExceptionally() {
        while (!pendingEvents.isEmpty()) {
            for (UUID workflowId : pendingEvents.keySet()) {
                ConcurrentHashMap<Long, CompletableFuture<Void>> idToFuture = pendingEvents.remove(workflowId);
                if (idToFuture == null) {
                    continue;
                }
                for (Map.Entry<Long, CompletableFuture<Void>> entry : idToFuture.entrySet()) {
                    log.info("Completing the future for workflow {} and event {} as the event processing was stopped"
                            , workflowId, entry.getKey());
                    entry.getValue().completeExceptionally(new ProcessingStoppedException());
                }
            }
        }
    }

    @Override
    public int getWorkflowsInProcessCount() {
        return pausableSemaphore.getPermits();
    }

    @Override
    public int getMaxWorkflows() {
        return pausableSemaphore.getMaxPermits();
    }

    @TransactionMandatory
    @Override
    public int getPendingWorkflowsCount() {
        if (!pausableSemaphore.isActive()) {
            return 0;
        }
        return workflowRepository.countWorkflowsToBeScheduled(Set.copyOf(lockedWorkflows));
    }

    @Override
    public synchronized void resume() {
        log.info("Resuming work");
        pausableSemaphore.resume();
    }

    @TransactionMandatory
    @Override
    public void updateMetrics() {
        // in-process workflows
        for (Map.Entry<Integer, AtomicInteger> inProcessCnt : metrics.getInProcessWorkflowsMetric().entrySet()) {
            inProcessCnt.getValue().set(pausableSemaphore.getPermitsInUse(inProcessCnt.getKey()));
        }

        // pending workflows
        Map<Integer, Integer> pendingCounters = workflowRepository.countWorkflowsToBeScheduledPerPoolId(Set.of());
        Map<Integer, Integer> adjustedPendingCounters = new HashMap<>();
        pendingCounters.forEach((poolId, workflows) -> {
            // null or outdated ids are dynamically re-mapped
            int effectivePoolId = getEffectiveProcessingPoolId(poolId);
            adjustedPendingCounters.compute(effectivePoolId, (k, v) -> (v != null ? v : 0) + workflows);
        });
        for (Map.Entry<Integer, AtomicInteger> pendingCnt : metrics.getPendingWorkflowsMetric().entrySet()) {
            pendingCnt.getValue().set(adjustedPendingCounters.getOrDefault(pendingCnt.getKey(), 0));
        }
    }

    private void drainEventQueue(UUID workflowId) {
        log.debug("Draining events for workflow {}", workflowId);

        long totalStartedAt = System.currentTimeMillis();
        long totalCount = 0;

        boolean draining = true;
        while (draining && pausableSemaphore.isActive()) {
            long startedAt = System.nanoTime();

            EventProcessingResult result = processOneEvent(workflowId);
            // Here, the processing transaction is committed already.
            // Thus, here we are only doing things that do not require any DB access.

            // (1) Record processing time and counters.
            addProcessedEvents(result, startedAt);

            // (2) Schedule more drain runnables.
            for (WorkflowToNotify workflowToNotify : result.workflowsToNotify) {
                if (!workflowId.equals(workflowToNotify.getWorkflowId())) {
                    scheduleDrainEventQueue(workflowToNotify.getWorkflowId(), workflowToNotify.getProcessingPoolId());
                }
            }

            // (3) Complete pending futures.
            switch (result.type) {
                case SUCCESS:
                    totalCount += 1;
                    // The race between the code here and the `scheduleEventWithLocalTracking` method should be resolved
                    // by the concurrent map implementation which will linearize calls to the compute* methods.
                    pendingEvents.computeIfPresent(workflowId, (ignored, idToFuture) -> {
                        CompletableFuture<Void> future = idToFuture.remove(result.eventId);
                        if (future != null) {
                            future.complete(null);
                        }
                        return idToFuture.isEmpty() ? null : idToFuture;
                    });
                    break;
                case CRASH:
                    totalCount += 1;
                    // Since the workflow has crashed and the crashed state is committed already,
                    // the `scheduleEventWithLocalTracking` method will not insert any new futures into the mapping.
                    // Thus we can simply fulfill all the pending futures and clean up.
                    ConcurrentHashMap<Long, CompletableFuture<Void>> idToFuture = pendingEvents.remove(workflowId);
                    while (idToFuture != null && !idToFuture.isEmpty()) {
                        for (Map.Entry<Long, CompletableFuture<Void>> entry : idToFuture.entrySet()) {
                            log.info("Completing the future for event {} of workflow {} as the workflow has crashed",
                                    entry.getKey(), workflowId);
                            entry.getValue().completeExceptionally(new WorkflowCrashedException());
                            idToFuture.remove(entry.getKey());
                        }
                    }
                    break;
            }

            // (4) logging workflow events
            draining = result.type == EventProcessingResultType.SUCCESS;
        }

        log.debug("Drained {} events in {}ms", totalCount, System.currentTimeMillis() - totalStartedAt);
    }

    private void addProcessedEvents(EventProcessingResult result, long startedAt) {
        if (MEASURED_OUTCOMES.contains(result.type)) {
            Message message = result.eventData;
            var key = new EventProcessingMeterKey(result.entityType, result.type,
                    message.getClass().getSimpleName());
            EventProcessingMeter meter = eventProcessingMeters.computeIfAbsent(key, EventProcessingMeter::new);
            meter.executeTimer.record(System.nanoTime() - startedAt, TimeUnit.NANOSECONDS);
            meter.counter.increment();

            // Record counters for solomon-tagged messages
            Descriptors.FieldDescriptor fieldDescriptor = message.getDescriptorForType().findFieldByName("solomonProperties");
            if (fieldDescriptor != null) {
                String solomonTag = ((TEventSolomonProperties)message.getField(fieldDescriptor)).getTag();
                var taggedKey = new EventProcessingTaggedMeterKey(result.entityType, result.type,
                        message.getClass().getSimpleName(), solomonTag);
                EventProcessingTaggedMeter taggedMeter = eventProcessingTaggedMeters.computeIfAbsent(
                        taggedKey, EventProcessingTaggedMeter::new);
                taggedMeter.counter.increment();
            }
        }
    }

    /**
     * Processes next unprocessed event for workflow with given uuid
     *
     * @return has more events and processed event id
     */
    private EventProcessingResult processOneEvent(UUID workflowId) {
        AtomicReference<String> prettyLogMarker = new AtomicReference<>("non-existing event");
        long startedAtNannos = System.nanoTime();
        EventProcessingResult result = transactionOperation.execute(status -> {
            // we register WF_EVENTS_LOGGER, so some events can be logged to it after transaction
            Workflow workflow = workflowRepository.getOne(workflowId);
            if (workflow.getState() != EWorkflowState.WS_RUNNING) {
                return EventProcessingResult.ofNotRunning();
            }

            Optional<WorkflowEvent> maybeEvent = workflowEventQueue.peekEvent(workflowId);
            if (maybeEvent.isEmpty()) {
                return EventProcessingResult.ofNoEvent();
            }

            WorkflowEvent event = maybeEvent.get();
            Long eventId = event.getId();
            int eventAttempt = event.getTimesTried();
            Message eventData = event.getData();
            String entityType = workflow.getEntityType();
            String workflowEntityId = workflow.getEntityId() != null ? workflow.getEntityId().toString() : "null";
            prettyLogMarker.set(String.format("event %s of class '%s'",
                    eventId,
                    eventData.getClass().getSimpleName()));

            try (var ignored = NestedMdc.nestedMdc(
                    Map.of("WorkflowId", workflowId.toString(), MDC_ENTITY_ID, workflowEntityId,
                            MDC_ENTITY_TYPE, workflow.getEntityType())
            )) {
                try {
                    try {
                        log.info("Processing {}", prettyLogMarker.get());
                        if (workflowProcessingListener != null) {
                            workflowProcessingListener.onEventStartProcessing(workflowId, eventId, eventData);
                        }
                        HandleWorkflowEventResult handleResult = handleWorkflowEvent(workflow, event);
                        List<UUID> workflowIdsToNotify = new ArrayList<>();
                        for (WorkflowMessagePair scheduledMessage : handleResult.getMessagePairs()) {
                            WorkflowEvent createdEvent =
                                    workflowEventQueue.enqueueMessage(scheduledMessage.getRecipientWorkflowId(),
                                            scheduledMessage.getMessage());
                            log.info("Enqueued event {} of class '{}' sent from workflow {} to workflow {}",
                                    createdEvent.getId(),
                                    createdEvent.getData().getClass().getSimpleName(),
                                    workflowId,
                                    createdEvent.getWorkflowId());
                            workflowIdsToNotify.add(createdEvent.getWorkflowId());
                        }
                        List<WorkflowToNotify> workflowsToNotify = workflowRepository.findAllById(workflowIdsToNotify)
                                .stream().map(wf -> new WorkflowToNotify(wf.getId(), getProcessingPoolId(wf)))
                                .collect(Collectors.toList());

                        workflowEventQueue.dequeueAsProcessed(event);
                        workflowRepository.flush();
                        // see the notes to the flush call in TrainToGenericMigrationProcessor.migrate
                        // for more information on wh we have to perform it two times before the final commit
                        status.flush();
                        return EventProcessingResult.ofSuccess(eventId, entityType, eventData, workflowsToNotify);
                    } catch (RuntimeException e) {
                        throw transactionOperation.translateIfPossibleAndRethrow(e);
                    }
                } catch (ConcurrencyFailureException cex) {
                    log.error("Concurrency failure exception with the following message '{}', " +
                            "occurred while processing {}", cex.getMessage(), prettyLogMarker.get(), cex);
                    status.setRollbackOnly();
                    return handleConcurrencyFailure(workflow, eventId, eventAttempt, eventData, entityType, cex);
                } catch (RetryableException ex) {
                    log.error("Retryable exception occurred while processing {}", prettyLogMarker.get(), ex);
                    status.setRollbackOnly();
                    if (ex.getWaitDuration() != null) {
                        if (eventAttempt < delayedRetriesLimit) {
                            return EventProcessingResult.ofRetry(eventId, entityType, eventData, ex,
                                    ex.getWaitDuration());
                        } else {
                            return EventProcessingResult.ofCrash(eventId, entityType, eventData, ex);
                        }
                    } else if (workflowEventRetryStrategy.shouldRetry(eventAttempt, ex)) {
                        Duration sleepDuration = workflowEventRetryStrategy.getWaitDuration(eventAttempt, ex);
                        return EventProcessingResult.ofRetry(eventId, entityType, eventData, ex, sleepDuration);
                    } else {
                        return EventProcessingResult.ofCrash(eventId, entityType, eventData, ex);
                    }
                } catch (Exception ex) {
                    log.error("Unhandled exception occurred while processing {}", prettyLogMarker.get(), ex);
                    SentryHelper.reportCrashExceptionToSentry(ex, "WorkflowProcessService");
                    status.setRollbackOnly();

                    return EventProcessingResult.ofCrash(eventId, entityType, eventData, ex);
                }
            }
        });

        Preconditions.checkNotNull(result, "Event processing result must not be null");

        switch (result.type) {
            case RETRY:
                log.info("Retrying processing of {} due to the exception of class '{}'",
                        prettyLogMarker.get(), result.exception.getClass().getSimpleName());
                try {
                    transactionOperation.execute(status -> {
                        WorkflowEvent event = workflowEventQueue.getEvent(workflowId, result.eventId);
                        Workflow workflow = workflowRepository.getOne(workflowId);

                        workflowEventQueue.registerForRetry(event);
                        workflow.sleepFor(result.sleepDuration);

                        return null;
                    });
                } catch (Throwable t) {
                    log.error("Unhandled exception while handling the retryable exception in the workflow", t);
                    throw t;
                }
                break;
            case CRASH:
                log.info("Crashing processing of {} due to the exception of class '{}'",
                        prettyLogMarker.get(), result.exception.getClass().getSimpleName());
                try {
                    transactionOperation.execute(status -> {
                        WorkflowEvent event = workflowEventQueue.getEvent(workflowId, result.eventId);
                        Workflow workflow = workflowRepository.getOne(workflowId);

                        workflowEventQueue.dequeueAsProcessingError(event);
                        workflow.transitionTo(EWorkflowState.WS_CRASHED,
                                ProtoUtils.errorFromThrowable(result.exception, true));

                        TWorkflowLoggingEvent.Builder eventBuilder = TWorkflowLoggingEvent.newBuilder()
                                .setType(EEventType.ET_WORKFLOW_CRASHED)
                                .setWorkflowId(workflow.getId().toString())
                                .setHappenedAt(ProtoUtils.fromInstant(Instant.now()));
                        if (workflow.getEntityId() != null) {
                            eventBuilder.setEntityId(workflow.getEntityId().toString());
                        }
                        if (workflow.getEntityType() != null) {
                            eventBuilder.setEntityType(workflow.getEntityType());
                        }
                        AfterCommitEventLogging.logEvent(WF_EVENTS_LOGGER, eventBuilder.build());

                        if (workflow.getSupervisorId() != null) {
                            WorkflowEvent createdEvent = workflowEventQueue.enqueueMessage(
                                    workflow.getSupervisorId(),
                                    TWorkflowCrashed.newBuilder()
                                            .setWorkflowId(workflowId.toString())
                                            .setEntityId(workflow.getEntityId().toString())
                                            .setEntityType(workflow.getEntityType())
                                            .setHappenedAt(ProtoUtils.fromInstant(Instant.now()))
                                            .setEventId(event.getId())
                                            .build());
                            log.info("Enqueued event {} of class '{}' sent from workflow {} to workflow {}",
                                    createdEvent.getId(),
                                    createdEvent.getData().getClass().getSimpleName(),
                                    workflowId,
                                    createdEvent.getWorkflowId());
                        }

                        return null;
                    });
                } catch (Throwable t) {
                    log.error("Unhandled exception while handling the unhandled exception in the workflow", t);
                    throw t;
                }
                break;
        }

        if (workflowProcessingListener != null) {
            workflowProcessingListener.onEventProcessed(workflowId, result.eventId, result.eventData, result.type);
        }

        return result;
    }

    private EventProcessingResult handleConcurrencyFailure(Workflow workflow, Long eventId, Integer eventAttempt,
                                                           Message eventData, String entityType,
                                                           Exception cex) {
        if (eventAttempt <= concurrencyFailureRetryCount) {
            Duration sleepDuration = Duration.ofMillis(
                    ThreadLocalRandom.current().nextLong(concurrencyFailureMaxTimeout.toMillis())
            );
            return EventProcessingResult.ofRetry(eventId, entityType, eventData, cex, sleepDuration);
        } else {
            // TODO (mbobrov): think of a separate supervisor for this type of crash
            return EventProcessingResult.ofCrash(eventId, entityType, eventData, cex);
        }
    }

    @SuppressWarnings({"unchecked", "rawtypes"})
    private HandleWorkflowEventResult handleWorkflowEvent(Workflow workflow, WorkflowEvent event) {
        Option<WorkflowEventHandler> maybeHandler = workflowEventHandlerMatcher.findEventHandlerFor(workflow,
                event.getData());
        WorkflowEventHandler handler = maybeHandler.getOrThrow(
                StringUtils.format("Event handler for event {} not found",
                        event.getData().getDescriptorForType().getName())
        );
        Option<MessagingContext> maybeContext = messagingContextFactory.createMessagingContextFor(workflow,
                event.getTimesTried());
        MessagingContext context = maybeContext.getOrThrow(
                StringUtils.format("Couldn't create persistent messaging context for workflow {}", workflow.getId())
        );
        handler.handleEvent(event.getData(), context);
        return new HandleWorkflowEventResult(context.getScheduledEvents());
    }

    private void schedulePendingWorkflows() {
        if (!pausableSemaphore.isActive()) {
            log.debug("Skipped scheduling task as we are not running");
            return;
        }
        long start = System.nanoTime();
        try {
            Map<Integer, Integer> poolLimits = pausableSemaphore.getAvailableBucketPermits();
            log.debug("Querying for workflows with unprocessed events. Pool limits {}", poolLimits);
            Set<UUID> excludedWorkflowIds = Set.copyOf(lockedWorkflows);
            Map<UUID, Integer> workflowsAndPools = transactionOperation.execute(tStatus ->
                    pendingWorkflowsFetcher.fetchPendingWorkflows(
                            defaultProcessingPoolId,
                            excludedWorkflowIds,
                            poolLimits
                    )
            );
            workflowsAndPools.forEach((workflowId, poolId) -> {
                log.debug("Pacing workflow with id {}, pool {}", workflowId, poolId);
                scheduleDrainEventQueue(workflowId, poolId);
            });
            workflowsSchedulingTimerSuccess.record(System.nanoTime() - start, TimeUnit.NANOSECONDS);
        } catch (Exception e) {
            log.error("Error occurred pacing workflows", e);
            workflowsSchedulingTimerFailure.record(System.nanoTime() - start, TimeUnit.NANOSECONDS);
        }
    }

    @Value
    private static class HandleWorkflowEventResult {
        private final List<WorkflowMessagePair> messagePairs;
    }

    @Value
    private static class WorkflowToNotify {
        private final UUID workflowId;
        private final int processingPoolId;
    }

    @RequiredArgsConstructor
    private static class EventProcessingResult {
        private final EventProcessingResultType type;
        private final Long eventId;
        private final String entityType;
        private final Message eventData;
        private final List<WorkflowToNotify> workflowsToNotify;
        private final Exception exception;
        private final Duration sleepDuration;

        static EventProcessingResult ofNoEvent() {
            return new EventProcessingResult(EventProcessingResultType.NO_EVENT, null, null, null,
                    Collections.emptyList(), null, null);
        }

        static EventProcessingResult ofNotRunning() {
            return new EventProcessingResult(EventProcessingResultType.NOT_RUNNING, null, null, null,
                    Collections.emptyList(), null, null);
        }

        static EventProcessingResult ofSuccess(Long eventId, String entityType, Message eventData,
                                               List<WorkflowToNotify> workflowsToNotify) {
            return new EventProcessingResult(EventProcessingResultType.SUCCESS, eventId, entityType, eventData,
                    workflowsToNotify, null, null);
        }

        static EventProcessingResult ofRetry(Long eventId, String entityType, Message eventData, Exception ex,
                                             Duration sleepDuration) {
            return new EventProcessingResult(EventProcessingResultType.RETRY, eventId, entityType, eventData,
                    Collections.emptyList(), ex, sleepDuration);
        }

        static EventProcessingResult ofCrash(Long eventId, String entityType,
                                             Message eventData, Exception ex) {
            return new EventProcessingResult(EventProcessingResultType.CRASH, eventId, entityType, eventData,
                    Collections.emptyList(), ex, null);
        }
    }

    @Value
    private static class EventProcessingMeterKey {
        private final String entityType;
        private final EventProcessingResultType outcome;
        private final String messageType;
    }

    @Value
    private static class EventProcessingTaggedMeterKey {
        private final String entityType;
        private final EventProcessingResultType outcome;
        private final String messageType;
        private final String solomonTag;
    }

    private static class EventProcessingMeter {
        final Counter counter;
        final Timer executeTimer;

        EventProcessingMeter(EventProcessingMeterKey key) {
            counter = Counter.builder("workflow.events.processedCount")
                    .tag("entity_type", key.getEntityType())
                    .tag("outcome", key.getOutcome().getTagValue())
                    .tag("message_type", key.getMessageType())
                    .register(Metrics.globalRegistry);

            executeTimer = Timer.builder("workflow.events.processingTime")
                    .tag("entity_type", key.getEntityType())
                    .tag("outcome", key.getOutcome().getTagValue())
                    .tag("message_type", key.getMessageType())
                    .publishPercentileHistogram(true)
                    .serviceLevelObjectives(WORKFLOW_MESSAGE_PROCESSING_SLA)
                    .publishPercentiles(MetricsUtils.higherPercentiles())
                    .register(Metrics.globalRegistry);
        }
    }

    private static class EventProcessingTaggedMeter {
        final Counter counter;

        EventProcessingTaggedMeter(EventProcessingTaggedMeterKey key) {
            counter = Counter.builder("workflow.events.processedTaggedCount")
                    .tag("entity_type", key.getEntityType())
                    .tag("outcome", key.getOutcome().getTagValue())
                    .tag("message_type", key.getMessageType())
                    .tag("solomon_tag", key.getSolomonTag())
                    .register(Metrics.globalRegistry);
        }
    }

    @Value
    private static class ProcessingMetrics {
        private final ImmutableMap<Integer, AtomicInteger> inProcessWorkflowsMetric;
        private final ImmutableMap<Integer, AtomicInteger> pendingWorkflowsMetric;
    }
}
