package ru.yandex.infra.stage;

import java.time.Duration;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Consumer;

import com.google.common.annotations.VisibleForTesting;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import ru.yandex.bolts.collection.Try;
import ru.yandex.infra.controller.RepeatedTask;
import ru.yandex.infra.controller.concurrent.LeaderService;
import ru.yandex.infra.controller.dto.SchemaMeta;
import ru.yandex.infra.controller.dto.StageMeta;
import ru.yandex.infra.controller.metrics.GaugeRegistry;
import ru.yandex.infra.controller.metrics.GolovanableGauge;
import ru.yandex.infra.controller.metrics.NamespacedGaugeRegistry;
import ru.yandex.infra.controller.util.ExitUtils;
import ru.yandex.infra.controller.yp.YpObject;
import ru.yandex.infra.controller.yp.YpObjectSettings;
import ru.yandex.infra.controller.yp.YpObjectTransactionalRepository;
import ru.yandex.infra.controller.yp.YpObjectsCache;
import ru.yandex.infra.stage.concurrent.SerialExecutor;
import ru.yandex.infra.stage.yp.ObjectLifeCycleManager;
import ru.yandex.yp.client.api.TProjectSpec;
import ru.yandex.yp.client.api.TProjectStatus;
import ru.yandex.yp.client.api.TStageSpec;
import ru.yandex.yp.client.api.TStageStatus;
import ru.yandex.yp.model.YpObjectType;

import static ru.yandex.infra.controller.util.YpUtils.CommonSelectors.SPEC_META;
import static ru.yandex.infra.controller.util.YpUtils.CommonSelectors.SPEC_STATUS_META_LABELS;

public class Engine {
    private static final Logger LOG = LoggerFactory.getLogger(Engine.class);

    static final String METRIC_YP_OBJECTS_LOAD_ERRORS_COUNT = "yp_objects_load_errors";
    static final String METRIC_YP_OBJECTS_LOAD_TIME = "yp_objects_load_time_ms";
    static final String METRIC_YP_STAGE_IDS_LOAD_TIME = "yp_stage_ids_load_time_ms";
    static final String METRIC_LAST_STAGES_PROCESSING_TIME = "last_stages_processing_time_ms";
    static final String METRIC_LAST_CHILD_OBJECTS_PROCESSING_TIME = "last_child_objects_processing_time_ms";
    static final String METRIC_LAST_CHILD_OBJECTS_UPDATE_TIME = "last_child_objects_update_time_ms";
    static final String METRIC_LAST_CHILD_OBJECTS_GC_TIME = "last_child_objects_gc_time_ms";
    static final String METRIC_CHILD_OBJECTS_GC_ERRORS_COUNT = "child_objects_gc_errors";
    static final String METRIC_SERIAL_EXECUTOR_TASKS_WAIT_TIMEOUTS = "serial_executor_tasks_wait_timeouts";
    static final String METRIC_LAST_RESOLVERS_WAIT_TIME = "last_resolvers_wait_time_ms";

    //Metrics
    private final AtomicLong metricYpObjectLoadErrorsCount = new AtomicLong();
    private final AtomicLong metricChildObjectsGcErrorsCount = new AtomicLong();
    private final AtomicLong metricSerialExecutorTasksWaitTimeoutsCount = new AtomicLong();

    private volatile Metrics lastIterationMetrics = new Metrics();
    private volatile Metrics currentIterationMetrics = new Metrics();

    static class Metrics {
        Long ypObjectsLoadTimeMilliseconds;
        Long ypStageIdsLoadTimeMilliseconds;
        Long stagesProcessingTimeMilliseconds;
        Long childObjectsProcessingTimeMilliseconds;
        Long childObjectsUpdateTimeMilliseconds;
        Long childObjectsGcTimeMilliseconds;
        Long resolversWaitTimeMilliseconds;
    }

    private final YpObjectTransactionalRepository<StageMeta, TStageSpec, TStageStatus> stageRepository;
    private final YpObjectsCache<StageMeta, TStageSpec, TStageStatus> stagesCache;
    private final YpObjectsCache<SchemaMeta, TProjectSpec, TProjectStatus> projectsCache;
    private final SerialExecutor serialExecutor;
    private final ScheduledExecutorService executor;
    private final LeaderService leaderService;
    private final Set<ObjectLifeCycleManager> listObjectRepositories;
    private final RootController rootController;
    private final RepeatedTask mainLoop;
    private final Duration externalResourcesTimeout;
    private final Duration ypObjectsUpdateTimeout;
    private final Duration updateInterval;

    public Engine(YpObjectTransactionalRepository<StageMeta, TStageSpec, TStageStatus> stageRepository,
                  YpObjectTransactionalRepository<SchemaMeta, TProjectSpec, TProjectStatus> projectRepository,
                  Duration updateInterval,
                  Duration mainLoopTimeout,
                  Duration externalResourcesTimeout,
                  Duration ypObjectsUpdateTimeout,
                  RootController rootController,
                  SerialExecutor serialExecutor,
                  LeaderService leaderService,
                  Set<ObjectLifeCycleManager> listObjectRepositories,
                  GaugeRegistry registry,
                  Map<YpObjectType, YpObjectSettings> ypObjectsCacheSettings) {
        this.stageRepository = stageRepository;
        this.updateInterval = updateInterval;
        this.externalResourcesTimeout = externalResourcesTimeout;
        this.ypObjectsUpdateTimeout = ypObjectsUpdateTimeout;
        this.rootController = rootController;
        this.serialExecutor = serialExecutor;
        this.executor = serialExecutor.getExecutor();
        this.leaderService = leaderService;
        this.listObjectRepositories = listObjectRepositories;

        stagesCache = new YpObjectsCache<>(stageRepository,
                YpObjectSettings.getSettingsForType(ypObjectsCacheSettings, YpObjectType.STAGE),
                new NamespacedGaugeRegistry(registry, "stages"),
                SPEC_STATUS_META_LABELS);
        projectsCache = new YpObjectsCache<>(projectRepository,
                YpObjectSettings.getSettingsForType(ypObjectsCacheSettings, YpObjectType.PROJECT),
                new NamespacedGaugeRegistry(registry, "projects"),
                SPEC_META);

        registry = new NamespacedGaugeRegistry(registry, "engine");

        registry.add(METRIC_YP_OBJECTS_LOAD_ERRORS_COUNT, new GolovanableGauge<>(metricYpObjectLoadErrorsCount::get, "dmmm"));
        registry.add(METRIC_YP_OBJECTS_LOAD_TIME, new GolovanableGauge<>(() -> lastIterationMetrics.ypObjectsLoadTimeMilliseconds, "axxx"));
        registry.add(METRIC_YP_STAGE_IDS_LOAD_TIME, new GolovanableGauge<>(() -> lastIterationMetrics.ypStageIdsLoadTimeMilliseconds, "axxx"));
        registry.add(METRIC_LAST_STAGES_PROCESSING_TIME, new GolovanableGauge<>(() -> lastIterationMetrics.stagesProcessingTimeMilliseconds, "axxx"));
        registry.add(METRIC_LAST_CHILD_OBJECTS_PROCESSING_TIME, new GolovanableGauge<>(() -> lastIterationMetrics.childObjectsProcessingTimeMilliseconds, "axxx"));
        registry.add(METRIC_LAST_CHILD_OBJECTS_UPDATE_TIME, new GolovanableGauge<>(() -> lastIterationMetrics.childObjectsUpdateTimeMilliseconds, "axxx"));
        registry.add(METRIC_LAST_CHILD_OBJECTS_GC_TIME, new GolovanableGauge<>(() -> lastIterationMetrics.childObjectsGcTimeMilliseconds, "axxx"));
        registry.add(METRIC_CHILD_OBJECTS_GC_ERRORS_COUNT, new GolovanableGauge<>(metricChildObjectsGcErrorsCount::get, "dmmm"));
        registry.add(METRIC_LAST_RESOLVERS_WAIT_TIME, new GolovanableGauge<>(() -> lastIterationMetrics.resolversWaitTimeMilliseconds, "axxx"));
        registry.add(METRIC_SERIAL_EXECUTOR_TASKS_WAIT_TIMEOUTS, new GolovanableGauge<>(metricSerialExecutorTasksWaitTimeoutsCount::get, "dmmm"));

        mainLoop = new RepeatedTask(this::mainLoop, updateInterval, mainLoopTimeout, executor, Optional.of(registry), LOG, true);
    }

    public void start() {
        if (!updateInterval.isZero()) {
            mainLoop.start();
        }
    }

    //performed in serialExecutor thread
    @VisibleForTesting
    CompletableFuture<?> mainLoop() {

        if (!leaderService.isLeader()) {
            LOG.error("Leadership lock lost, performing forced shutdown");
            ExitUtils.gracefulExit(ExitUtils.LEADERSHIP_LOST);
        }

        lastIterationMetrics = currentIterationMetrics;
        currentIterationMetrics = new Metrics();
        rootController.beginStatisticsCollection();

        //Start loading of stage child objects in advance.
        //We will process results later. All stages should be synced before processing child objects.
        listObjectRepositories.forEach(ObjectLifeCycleManager::startPolling);

        return loadYpObjects()
                .thenAcceptAsync(this::syncStages, executor)
                .thenCompose(x -> waitForSerialExecutor("Docker/Sandbox resolver requests",
                        externalResourcesTimeout, t -> currentIterationMetrics.resolversWaitTimeMilliseconds = t))
                .thenComposeAsync(x -> processStageChildObjectsPollingResults(), executor)
                .thenRunAsync(this::processStageChildObjectsGC, executor)
                .thenCompose(x -> waitForSerialExecutor("Stage child objects updates",
                        ypObjectsUpdateTimeout, t -> currentIterationMetrics.childObjectsUpdateTimeMilliseconds = t))
                .thenRunAsync(() -> {
                    if (!leaderService.isProcessingAllowed()) {
                        leaderService.allowProcessing();
                    }
                    rootController.updateStatuses();
                    LOG.info("Stage status sender queue size: {}", serialExecutor.getAllSubmittedFutures().size());
                    rootController.buildStatistics();
                }, executor);
    }

    private CompletableFuture<YpObjects> loadYpObjects() {
        long startTimeMillis = System.currentTimeMillis();
        return stageRepository.generateTimestamp()
                .thenCompose(timestamp -> {
                    var ypObjects = new YpObjects();
                    ypObjects.timestamp = timestamp;
                    return CompletableFuture.allOf(
                            stageRepository.listAllIds(timestamp)
                                    .whenComplete((x,error) -> currentIterationMetrics.ypStageIdsLoadTimeMilliseconds = System.currentTimeMillis() - startTimeMillis)
                                    .thenAccept(result -> ypObjects.allStageIds = result),
                            stagesCache.selectObjects(Optional.of(timestamp))
                                    .thenAccept(result -> ypObjects.stagesWithDeployEngine = result),
                            projectsCache.selectObjects(Optional.of(timestamp))
                                    .thenAccept(result -> ypObjects.projects = result)
                            ).thenApply(x -> ypObjects);
                })
                .whenComplete((result, error) -> {
                    currentIterationMetrics.ypObjectsLoadTimeMilliseconds = System.currentTimeMillis() - startTimeMillis;
                    if (error != null) {
                        metricYpObjectLoadErrorsCount.incrementAndGet();
                    }
                });
    }

    private void syncStages(YpObjects ypObjects) {
        LOG.info("SerialExecutor queue size before stages sync: {}", serialExecutor.getAllSubmittedFutures().size());
        LOG.info("Loaded {} of {} stage ids; {} projects in {} ms",
                ypObjects.stagesWithDeployEngine.size(),
                ypObjects.allStageIds.size(),
                ypObjects.projects.size(),
                currentIterationMetrics.ypObjectsLoadTimeMilliseconds);

        if (ypObjects.stagesWithDeployEngine.isEmpty()) {
            LOG.warn("List of stages is empty");
        }

        long startTimeMillis = System.currentTimeMillis();

        rootController.processGcForRemovedStages(ypObjects.stagesWithDeployEngine.keySet(), ypObjects.allStageIds);
        rootController.sync(ypObjects.stagesWithDeployEngine, ypObjects.projects);
        currentIterationMetrics.stagesProcessingTimeMilliseconds = System.currentTimeMillis() - startTimeMillis;
    }

    private CompletableFuture<?> processStageChildObjectsPollingResults() {
        long startTimeMillis = System.currentTimeMillis();
        CompletableFuture<?>[] futures = listObjectRepositories.stream()
                .map(ObjectLifeCycleManager::processPollingResults)
                .toArray(CompletableFuture<?>[]::new);
        return CompletableFuture.allOf(futures)
                //Don't stop if any cluster failed to poll
                //Exeption is already logged inside ObjectLifeCycleManagerImpl
                .exceptionally(error -> null)
                .whenComplete((result, error) ->
                        currentIterationMetrics.childObjectsProcessingTimeMilliseconds = System.currentTimeMillis() - startTimeMillis);
    }

    private void processStageChildObjectsGC() {
        long startTimeMillis = System.currentTimeMillis();
        for(var childObjectsLifeCycleManager : listObjectRepositories) {
            try {
                childObjectsLifeCycleManager.startGcCycle();
            } catch (Exception exception) {
                metricChildObjectsGcErrorsCount.incrementAndGet();
                LOG.error("Stage child objects GC failed: {}", childObjectsLifeCycleManager, exception);
            }
        }

        currentIterationMetrics.childObjectsGcTimeMilliseconds = System.currentTimeMillis() - startTimeMillis;
    }

    private CompletableFuture<?> waitForSerialExecutor(String logMessage, Duration timeout, Consumer<Long> metricUpdateConsumer) {
        long startTimeMillis = System.currentTimeMillis();
        List<CompletableFuture<?>> submittedFutures = serialExecutor.getAllSubmittedFutures();
        LOG.info("{} queue size: {}", logMessage, submittedFutures.size());
        return CompletableFuture.allOf(submittedFutures.toArray(CompletableFuture<?>[]::new))
                .orTimeout(timeout.toNanos(), TimeUnit.NANOSECONDS)
                //Errors are already logged in SerialExecutor callbacks
                .exceptionally(error -> {
                    if(error instanceof TimeoutException) {
                        metricSerialExecutorTasksWaitTimeoutsCount.incrementAndGet();
                        LOG.warn("Failed to wait for completion of all SerialExecutor tasks (Timeout expired: {}). {} of {} tasks are still running.",
                                timeout, submittedFutures.stream().filter(future -> !future.isDone()).count(), submittedFutures.size());
                    }
                    return null;
                })
                .whenComplete((result, error) -> metricUpdateConsumer.accept(System.currentTimeMillis() - startTimeMillis));
    }

    static class YpObjects {
        long timestamp;
        Set<String> allStageIds;//All stages on cluster ignoring any labels
        Map<String, Try<YpObject<StageMeta, TStageSpec, TStageStatus>>> stagesWithDeployEngine;//Only deploy stages with labels "deploy_engine" = ...
        Map<String, Try<YpObject<SchemaMeta, TProjectSpec, TProjectStatus>>> projects;
    }
}
