package ru.yandex.solomon.coremon.meta.ttl;

import java.time.Instant;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import javax.annotation.Nullable;

import com.google.common.base.Throwables;
import com.google.protobuf.UnsafeByteOperations;
import it.unimi.dsi.fastutil.objects.Object2LongOpenHashMap;
import org.joda.time.Duration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Import;
import org.springframework.stereotype.Component;

import ru.yandex.misc.thread.ThreadLocalTimeout;
import ru.yandex.monlib.metrics.labels.Labels;
import ru.yandex.monlib.metrics.registry.MetricRegistry;
import ru.yandex.solomon.codec.serializer.OwnerField;
import ru.yandex.solomon.config.protobuf.coremon.TCoremonMetabaseConfig;
import ru.yandex.solomon.config.thread.ThreadPoolProvider;
import ru.yandex.solomon.core.conf.ShardKeyAndId;
import ru.yandex.solomon.core.conf.SolomonConfWithContext;
import ru.yandex.solomon.core.conf.watch.SolomonConfListener;
import ru.yandex.solomon.core.db.model.ShardState;
import ru.yandex.solomon.coremon.meta.db.MetabaseShardStorage;
import ru.yandex.solomon.coremon.meta.db.MetricsDao;
import ru.yandex.solomon.coremon.meta.db.MetricsDaoFactory;
import ru.yandex.solomon.coremon.meta.db.ydb.YdbMetricsDaoContext;
import ru.yandex.solomon.coremon.meta.service.MetabaseShard;
import ru.yandex.solomon.coremon.meta.service.MetabaseShardResolver;
import ru.yandex.solomon.coremon.meta.service.ShardLocator;
import ru.yandex.solomon.coremon.meta.service.cloud.EmptyResourceFinder;
import ru.yandex.solomon.coremon.meta.service.cloud.ResourceFinder;
import ru.yandex.solomon.coremon.meta.service.cloud.ResourceMap;
import ru.yandex.solomon.coremon.meta.ttl.Batcher.DeleteBatch;
import ru.yandex.solomon.coremon.meta.ttl.Batcher.LoadResourceBatch;
import ru.yandex.solomon.coremon.meta.ttl.tasks.FinishedTask;
import ru.yandex.solomon.coremon.meta.ttl.tasks.RunningTask;
import ru.yandex.solomon.coremon.meta.ttl.tasks.Task;
import ru.yandex.solomon.coremon.meta.ttl.tasks.TaskStats;
import ru.yandex.solomon.staffOnly.annotations.LinkedOnRootPage;
import ru.yandex.solomon.staffOnly.annotations.ManagerMethod;
import ru.yandex.solomon.staffOnly.annotations.ManagerMethodArgument;
import ru.yandex.solomon.staffOnly.html.HtmlWriterWithCommonLibraries;
import ru.yandex.solomon.staffOnly.manager.ExtraContentParam;
import ru.yandex.solomon.staffOnly.manager.special.ExtraContent;
import ru.yandex.stockpile.api.EProjectId;
import ru.yandex.stockpile.api.EStockpileStatusCode;
import ru.yandex.stockpile.api.MetricMeta;
import ru.yandex.stockpile.api.ReadMetricsMetaRequest;
import ru.yandex.stockpile.api.TWriteDataBinaryRequest;
import ru.yandex.stockpile.client.StockpileClient;
import ru.yandex.stockpile.client.writeRequest.StockpileShardWriteRequestBuilder;


/**
 * @author Sergey Polovko
 */
@Import({
    YdbMetricsDaoContext.class,
})
@Component
@LinkedOnRootPage("Deletion Manager")
public class DeletionManager implements Deleter, MetaLoader, ResourceLoader, SolomonConfListener {
    private static final Logger logger = LoggerFactory.getLogger(DeletionManager.class);
    private static final long SCHEDULE_PERIOD_MILLIS = TimeUnit.MINUTES.toMillis(1);
    private static final int MAX_RETRIES = 5;

    private final MetabaseShardResolver<? extends MetabaseShard> shardResolver;
    private final StockpileClient stockpileClient;
    private final ResourceFinder resourceFinder;
    private final MetricsDaoFactory metricsDaoFactory;
    private final ExecutorService executor;
    private final ScheduledExecutorService scheduler;
    private final ShardLocator shardLocator;
    private final UnknownReferenceTracker unknownReferenceTracker;
    private final DeletionManagerMetrics metrics;

    // configs
    private final int maxRunningTasksCount;
    private final int maxAsyncOperationsPerTask;
    private final Duration asyncOperationTimeout;

    private final AtomicInteger runningTasksCount = new AtomicInteger();
    final ConcurrentHashMap<Integer, Task> tasksByShardNumId = new ConcurrentHashMap<>();

    @Nullable
    private volatile SolomonConfWithContext lastConf;

    @Autowired
    public DeletionManager(
        MetabaseShardResolver<? extends MetabaseShard> shardResolver,
        StockpileClient stockpileClient,
        Optional<ResourceFinder> resourceFinder,
        MetricsDaoFactory metricsDaoFactory,
        TCoremonMetabaseConfig.TTtlDeletionConfig config,
        ThreadPoolProvider threadPoolProvider,
        ShardLocator shardLocator,
        UnknownReferenceTracker unknownReferenceTracker,
        MetricRegistry registry)
    {
        this.shardResolver = shardResolver;
        this.stockpileClient = stockpileClient;
        this.resourceFinder = resourceFinder.orElse(new EmptyResourceFinder());
        this.metricsDaoFactory = metricsDaoFactory;
        this.executor = threadPoolProvider.getExecutorService(
            config.getThreadPoolName(),
            "MetabaseConfig.TtlDeletionConfig.ThreadPoolName");
        this.scheduler = threadPoolProvider.getSchedulerExecutorService();
        this.shardLocator = shardLocator;
        this.unknownReferenceTracker = unknownReferenceTracker;
        this.metrics = new DeletionManagerMetrics(registry);

        // possible values [1..50]
        this.maxRunningTasksCount = Math.min(Math.max(config.getMaxRunningTasksCount(), 1), 50);

        // possible values [1..16]
        this.maxAsyncOperationsPerTask = Math.min(Math.max(config.getMaxAsyncOperationsPerTask(), 1), 16);

        // possible values [1..180]
        this.asyncOperationTimeout = Duration.standardSeconds(
            Math.min(Math.max(config.getAsyncOperationTimeoutSeconds(), 1), 180));

        long initialDelay = TimeUnit.MINUTES.toMillis(10) + ThreadLocalRandom.current().nextLong(SCHEDULE_PERIOD_MILLIS);
        scheduler.scheduleAtFixedRate(this::startPeriodicDeletion, initialDelay, SCHEDULE_PERIOD_MILLIS, TimeUnit.MILLISECONDS);
    }

    private void startPeriodicDeletion() {
        try {
            SolomonConfWithContext conf = this.lastConf;
            if (conf == null) {
                logger.warn("periodic deletion cannot start because configuration is not yet loaded");
                return;
            }

            int runningTasksCount = this.runningTasksCount.get();
            if (runningTasksCount > maxRunningTasksCount) {
                logger.info("skip periodic deletion because too many tasks already running: " + runningTasksCount);
                return;
            }

            // do not block scheduler thread
            executor.execute(() -> {
                runPeriodicTtlDeletionTasks(conf);
                runUpdateUnknownReferenceCount(conf);
            });
        } catch (Throwable t) {
            logger.error("periodic deletion failed", t);
        }
    }

    private void runPeriodicTtlDeletionTasks(SolomonConfWithContext conf) {
        Object2LongOpenHashMap<Integer> lastTaskCreatedAt = new Object2LongOpenHashMap<>();
        lastTaskCreatedAt.defaultReturnValue(0);
        for (var e : tasksByShardNumId.entrySet()) {
            lastTaskCreatedAt.put(e.getKey(), e.getValue().getCreatedAtMillis());
        }

        final Instant now = Instant.now();
        final long oneDayBeforeMillis = now.minus(1, ChronoUnit.DAYS).toEpochMilli();
        final long oneSchedulePeriodBeforeMillis = now.minus(SCHEDULE_PERIOD_MILLIS, ChronoUnit.MILLIS).toEpochMilli();

        conf.getCorrectShardsStream()
            .flatMap(s -> {
                if (!shardLocator.isLocal(s.getNumId())) {
                    // skip non local shards
                    return Stream.empty();
                }
                if (s.getMetricsTtlDays() <= 0) {
                    // skip shards without metrics TTL configuration
                    return Stream.empty();
                }
                ShardState state = s.getRaw().getState();
                if (state == ShardState.READ_ONLY || state == ShardState.INACTIVE) {
                    // skip readonly and inactive shards
                    return Stream.empty();
                }
                if (now.minus(s.getMetricsTtlDays(), ChronoUnit.DAYS).isBefore(s.getCreatedAt())) {
                    // skip recently created shard
                    return Stream.empty();
                }

                Task task = tasksByShardNumId.get(s.getNumId());
                if (task != null) {
                    if (task instanceof RunningTask) {
                        // skip shards with still running task
                        return Stream.empty();
                    }
                    if (task instanceof FinishedTask) {
                        // skip shards already processed in the last 1 day
                        if (task.getCreatedAtMillis() >= oneDayBeforeMillis) {
                            return Stream.empty();
                        }
                    }
                    if (task.getCreatedAtMillis() > oneSchedulePeriodBeforeMillis) {
                        // do not retry more than once per schedule period
                        return Stream.empty();
                    }
                }

                try {
                    MetabaseShard shard = shardResolver.resolveShard(s.getNumId());
                    if (shard.isLoaded() && shard.getUptimeMillis() > TimeUnit.HOURS.toMillis(1)) {
                        // process only ready and mature shards
                        return Stream.of(shard);
                    }
                } catch (Exception e) {
                    logger.warn("cannot resolve shard {}", Integer.toUnsignedString(s.getNumId()));
                }

                return Stream.empty();
            })
            .sorted((s1, s2) -> {
                final long createdAt1 = lastTaskCreatedAt.getLong(s1.getNumId());
                final long createdAt2 = lastTaskCreatedAt.getLong(s2.getNumId());
                return Long.compare(createdAt1, createdAt2);
            })
            .limit(maxRunningTasksCount - runningTasksCount.get())
            .forEach(s -> {
                int metricsTtlDays = conf.getShardByNumId(s.getNumId())
                    .getConfOrThrow()
                    .getMetricsTtlDays();
                runDeletionTask(s, now.minus(metricsTtlDays, ChronoUnit.DAYS));
            });
    }

    private void runUpdateUnknownReferenceCount(SolomonConfWithContext conf) {
        Object2LongOpenHashMap<String> unknownByServiceProvider = new Object2LongOpenHashMap<>();
        unknownByServiceProvider.defaultReturnValue(0);
        unknownByServiceProvider.put("total", 0);

        for (var entry : tasksByShardNumId.entrySet()) {
            var stats = entry.getValue().getStats();
            if (stats.getUnknownReference() == 0) {
                continue;
            }

            int numId = entry.getKey();
            if (!shardLocator.isLocal(numId)) {
                continue;
            }

            var shard = conf.getShardByNumIdOrNull(numId);
            if (shard == null) {
                continue;
            }

            if (!shard.isCorrect()) {
                continue;
            }

            var serviceProvider = shard.getConfOrThrow().getService().getServiceProvider();
            if (serviceProvider == null) {
                continue;
            }

            int count = stats.getUnknownReference();
            unknownByServiceProvider.addTo(serviceProvider.getId(), count);
            unknownByServiceProvider.addTo("total", count);
        }

        metrics.updateUnknownRef(unknownByServiceProvider);
    }

    private void runDeletionTask(MetabaseShard shard, Instant expireTime) {
        try {
            MetabaseShardStorage storage = shard.getStorage();
            Set<String> referenceLabels = Optional.ofNullable(lastConf)
                    .map(conf -> conf.getServiceProvider(shard.getServiceProvider()))
                    .map(serviceProvider -> serviceProvider.getReferences()
                            .stream()
                            .map(referenceConf -> referenceConf.label)
                            .collect(Collectors.toSet()))
                    .orElse(Set.of());

            var key = new ShardKeyAndId(shard.getShardKey(), shard.getId(), shard.getNumId());
            Batcher batcher = new Batcher(
                    key,
                    storage.getFileMetrics(),
                    (int) expireTime.getEpochSecond(),
                    referenceLabels,
                    unknownReferenceTracker);
            try {
                RunningTask task = new RunningTask(
                        shard.getShardKey().getProject(),
                        shard.getNumId(),
                        batcher, this, this, this,
                    executor, maxAsyncOperationsPerTask);
                Task newTask = tasksByShardNumId.compute(shard.getNumId(),
                    (id, prevTask) -> (prevTask instanceof RunningTask) ? prevTask : task);

                if (newTask == task) {
                    runningTasksCount.incrementAndGet();
                    logger.info("deletion for shard {}, expired before {}",
                        Integer.toUnsignedString(shard.getNumId()), expireTime);

                    CompletableFuture<Void> taskFuture = task.start()
                        .whenComplete((r, t) -> {
                            onTaskComplete(shard.getNumId(), t, storage);
                            runningTasksCount.decrementAndGet();
                            batcher.close();
                        });

                    metrics.tasks.forFuture(taskFuture);
                } else {
                    batcher.close();
                }
            } catch (Throwable t) {
                batcher.close();
                throw new RuntimeException(t);
            }
        } catch (Throwable t) {
            logger.error("cannot start deletion for shard {}", Integer.toUnsignedString(shard.getNumId()), t);
            throw Throwables.propagate(t);
        }
    }

    @ManagerMethod
    private String startDeletion(@ManagerMethodArgument(name = "shardIdOrNumId") String shardId) {
        int numId = numIdByString(shardId);
        Task existedTask = tasksByShardNumId.get(numId);
        if (existedTask instanceof RunningTask) {
            return "TTL task already running";
        }

        int metricsTtlDays = this.lastConf.getShardByNumId(numId)
            .getConfOrThrow()
            .getMetricsTtlDays();
        if (metricsTtlDays <= 0) {
            return "TTL not configured";
        }

        MetabaseShard shard = shardResolver.resolveShard(numId);
        Instant expireTime = Instant.now().minus(metricsTtlDays, ChronoUnit.DAYS);
        runDeletionTask(shard, expireTime);
        return "TTL task started, will delete metrics not updated since " + expireTime;
    }

    @ManagerMethod
    private void stopDeletion(@ManagerMethodArgument(name = "shardIdOrNumId") String shardId) {
        stopDeletion(numIdByString(shardId));
    }

    private void stopDeletion(int numId) {
        Task task = tasksByShardNumId.remove(numId);
        if (task instanceof RunningTask) {
            logger.info("deletion for shard {} stopped", Integer.toUnsignedString(numId));
            ((RunningTask) task).stop();
        }
    }

    private void onTaskComplete(int numId, @Nullable Throwable throwable, MetabaseShardStorage storage) {
        final Task task = tasksByShardNumId.remove(numId);
        if (task instanceof RunningTask) {
            if (throwable == null) {
                FinishedTask finishedTask = ((RunningTask) task).toFinished();
                TaskStats stats = finishedTask.getStats();

                tasksByShardNumId.put(numId, finishedTask);
                logger.info("deletion for shard {} finished {}", Integer.toUnsignedString(numId), stats);

                if (stats.getDeletedMetrics() > 0) {
                    // TODO: must be implemented more sophisticated strategy to update storage memory state:
                    //      1) removing many metrics from huge collections
                    //      2) removing few metrics from huge collections
                    //      3) removing few metrics from small collections
                    storage.reload();

                    metrics.metricsDeleted.add(stats.getDeletedMetrics());
                }
            } else {
                tasksByShardNumId.put(numId, ((RunningTask) task).toFailed(throwable));
                logger.warn("deletion for shard {} failed", Integer.toUnsignedString(numId), throwable);
            }
        }
    }

    @Override
    public CompletableFuture<List<MetricMeta>> loadMeta(Batcher.LoadMetaBatch batch) {
        try {
            var request = ReadMetricsMetaRequest.newBuilder()
                .setShardId(batch.getStockpileShardId());
            for (int i = 0; i < batch.size(); i++) {
                request.addLocalIds(batch.getMetric(i).getLocalId());
            }

            var f = new CompletableFuture<List<MetricMeta>>();
            loadMetaWithRetries(f, request.build(), 0);
            metrics.metaLoads.forFuture(f);
            return f;
        } catch (Throwable e) {
            return CompletableFuture.failedFuture(e);
        }
    }

    private void loadMetaWithRetries(CompletableFuture<List<MetricMeta>> promise, ReadMetricsMetaRequest request, int retry) {
        stockpileClient.readMetricsMeta(request)
            .whenComplete((response, throwable) -> {
                if (throwable != null) {
                    if (retry == MAX_RETRIES) {
                        promise.completeExceptionally(throwable);
                    }
                } else if (response.getStatus() == EStockpileStatusCode.OK) {
                    promise.complete(response.getMetaList());
                } else if (retry == MAX_RETRIES) {
                    String msg = response.getStatus() + ": " + response.getStatusMessage();
                    promise.completeExceptionally(new RuntimeException(msg));
                }

                // retry after some delay
                if (!promise.isDone()) {
                    scheduler.schedule(() -> {
                        loadMetaWithRetries(promise, request, retry + 1);
                    }, 1 << retry, TimeUnit.SECONDS); // 1s, 2s, 4s, 8s, ...
                }
            });
    }

    @Override
    public CompletableFuture<ResourceMap> loadResources(String cloudId, LoadResourceBatch batch) {
        var future = new CompletableFuture<ResourceMap>();
        metrics.resourceLoads.forFuture(future);
        try {
            loadResourcesWithRetries(future, cloudId, batch, 0);
        } catch (Throwable e) {
            future.completeExceptionally(e);
        }
        return future;
    }

    private void loadResourcesWithRetries(CompletableFuture<ResourceMap> promise, String cloudId, LoadResourceBatch batch, int retry) {
        resourceFinder.resolve(cloudId, batch.resourceIds(), 0)
                .whenComplete((response, throwable) -> {
                    if (throwable != null) {
                        if (retry == MAX_RETRIES) {
                            promise.completeExceptionally(throwable);
                        }
                    } else {
                        promise.complete(response);
                    }

                    // retry after some delay
                    if (!promise.isDone()) {
                        scheduler.schedule(() -> {
                            loadResourcesWithRetries(promise, cloudId, batch, retry + 1);
                        }, (long) 1 << retry, TimeUnit.SECONDS); // 1s, 2s, 4s, 8s, ...
                    }
                });
    }

    @Override
    public CompletableFuture<Void> delete(int numId, DeleteBatch batch) {
        CompletableFuture<Void> stockpileFuture = deleteFromStockpile(batch);
        metrics.deletionsStockpile.forFuture(stockpileFuture);

        CompletableFuture<Void> kikimrFuture = stockpileFuture
            .thenCompose(unit -> deleteFromKikimr(numId, batch));
        metrics.deletionsKikimr.forFuture(kikimrFuture);

        return kikimrFuture.thenAcceptAsync(unit -> {}, executor);
    }

    private CompletableFuture<Void> deleteFromStockpile(Batcher.DeleteBatch batch) {
        try {
            EProjectId projectId = EProjectId.UNKNOWN;
            var builder = new StockpileShardWriteRequestBuilder(projectId, OwnerField.UNKNOWN_SHARD_ID);
            for (int i = 0; i < batch.size(); i++) {
                builder.addDeleteData(batch.getMetric(i).getLocalId());
            }

            byte[] content;
            try (var writeRequest = builder.build()) {
                content = writeRequest.serialize();
            }

            var request = TWriteDataBinaryRequest.newBuilder()
                .setShardId(batch.getStockpileShardId())
                .setContent(UnsafeByteOperations.unsafeWrap(content));

            var f = new CompletableFuture<Void>();
            deleteFromStockpileWithRetries(f, request.build(), 0);
            return f;
        } catch (Throwable t) {
            return CompletableFuture.failedFuture(t);
        }
    }

    private void deleteFromStockpileWithRetries(CompletableFuture<Void> promise, TWriteDataBinaryRequest request, int retry) {
        stockpileClient.writeDataBinary(request)
            .whenComplete((response, throwable) -> {
                if (throwable != null) {
                    if (retry == MAX_RETRIES) {
                        promise.completeExceptionally(throwable);
                    }
                } else if (response.getStatus() == EStockpileStatusCode.OK) {
                    promise.complete(null);
                } else if (retry == MAX_RETRIES) {
                    String msg = response.getStatus() + ": " + response.getStatusMessage();
                    promise.completeExceptionally(new RuntimeException(msg));
                }

                // retry after some delay
                if (!promise.isDone()) {
                    scheduler.schedule(() -> {
                        deleteFromStockpileWithRetries(promise, request, retry + 1);
                    }, 1 << retry, TimeUnit.SECONDS); // 1s, 2s, 4s, 8s, ...
                }
            });
    }

    private CompletableFuture<Void> deleteFromKikimr(int numId, DeleteBatch batch) {
        ThreadLocalTimeout.Handle h = ThreadLocalTimeout.push(asyncOperationTimeout);
        try {
            List<Labels> keys = new ArrayList<>(batch.size());
            for (int i = 0; i < batch.size(); i++) {
                keys.add(batch.getMetric(i).getLabels());
            }

            MetricsDao daoForShard = metricsDaoFactory.create(numId);
            return daoForShard.deleteMetrics(keys);
        } catch (Throwable t) {
            return CompletableFuture.failedFuture(t);
        } finally {
            h.popSafely();
        }
    }

    @Override
    public void onConfigurationLoad(SolomonConfWithContext newConf) {
        lastConf = newConf;
        try {
            var shardNumIds = new ArrayList<>(tasksByShardNumId.keySet());
            for (int numId : shardNumIds) {
                if (!shardLocator.isLocal(numId)) {
                    stopDeletion(numId);
                }
            }
        } catch (Throwable t) {
            logger.warn("cannot update configuration", t);
        }
    }

    private int numIdByString(String shardId) {
        var conf = lastConf;
        Objects.requireNonNull(conf, "Config not loaded");
        var shard = conf.getShardByIdOrNull(shardId);
        if (shard != null) {
            return shard.getNumId();
        }
        return Integer.parseUnsignedInt(shardId);
    }

    @ExtraContent
    private void extraContent(ExtraContentParam p) {
        HtmlWriterWithCommonLibraries hw = p.getHtmlWriter();
        DeletionManagerWww.tasksInfo(hw, tasksByShardNumId, lastConf);
    }
}
