package ru.yandex.qloud.kikimr.healthcheck;

import com.codahale.metrics.health.HealthCheck;
import com.codahale.metrics.health.HealthCheckRegistry;
import com.google.common.collect.Maps;
import org.apache.commons.lang3.time.DateUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import ru.yandex.qloud.kikimr.jobs.DropTablesByTTLJob;
import ru.yandex.qloud.kikimr.transport.KikimrScheme;
import ru.yandex.qloud.kikimr.utils.TableUtils;

import javax.annotation.PostConstruct;
import javax.inject.Inject;
import java.time.LocalDate;
import java.util.Collections;
import java.util.Date;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;
import java.util.stream.Collectors;

import static com.google.common.base.MoreObjects.firstNonNull;

/**
 * @author violin
 */
@Component
public class TableCleanupHealthcheck {
    private final static Logger LOG = LoggerFactory.getLogger(TableCleanupHealthcheck.class);

    private static final int MAX_FAILED_ATTEMPTS_BEFORE_CRITICAL = 3;

    private final ScheduledExecutorService scheduledExecutorService = Executors.newSingleThreadScheduledExecutor();

    @Inject
    private HealthCheckRegistry healthCheckRegistry;

    @Inject
    private KikimrScheme kikimrScheme;

    @Inject
    private DropTablesByTTLJob dropTablesByTTLJob;

    @Value("${kikimr.start.jobs}")
    private boolean startJobs;

    private volatile Date lastCheckDate;

    private volatile Map<String, Integer> stuckTables = Collections.emptyMap();

    @PostConstruct
    public void init() {
        if (! startJobs) {
            return;
        }

        scheduledExecutorService.scheduleAtFixedRate(this::updateStuckTables, 10, 60, TimeUnit.MINUTES);

        healthCheckRegistry.register("table_cleanup_enabled", new HealthCheck() {
            @Override
            protected Result check() throws Exception {
                return dropTablesByTTLJob.isCleanupEnabled() ? Result.healthy("OK; table cleanup enabled") :
                        Result.unhealthy("ATTENTION: table cleanup disabled");
            }
        });

        LOG.debug("previous lastCheckDate = {}", lastCheckDate);
        healthCheckRegistry.register("table_cleanup_last_success", new HealthCheck() {
            @Override
            protected HealthCheck.Result check() throws Exception {
                final Date cleanupDate = dropTablesByTTLJob.getLastSuccessCleanupDate();
                if (cleanupDate == null && lastCheckDate == null) {
                    lastCheckDate = new Date();
                    return Result.healthy("no data");
                }

                if (cleanupDate != null) {
                    lastCheckDate = cleanupDate;
                }

                LOG.debug("current lastCheckDate = {}", lastCheckDate);

                final int threshold = dropTablesByTTLJob.getJobSchedulePeriodMinutes() * 4;
                boolean healthy = DateUtils.addMinutes(lastCheckDate, threshold).after(new Date());

                final String lastSuccessMessage = cleanupDate != null ? "last successful cleanup on " + lastCheckDate
                        : "no data about last successful cleanup date";
                return healthy ? Result.healthy("OK; %s", lastSuccessMessage)
                        : Result.unhealthy("table cleanup broken; %s", lastSuccessMessage);
            }
        });

        healthCheckRegistry.register("table_cleanup_stuck_tables", new HealthCheck() {
            @Override
            protected HealthCheck.Result check() throws Exception {
                final Map<String, Integer> allStuckTables = stuckTables;
                final Map<String, Integer> permanentStuckTables = Maps.filterEntries(
                        allStuckTables,
                        input -> input.getValue() > MAX_FAILED_ATTEMPTS_BEFORE_CRITICAL
                );
                return permanentStuckTables.isEmpty() ?
                        Result.healthy("OK; all stuck tables (delete attempts): %s", allStuckTables) :
                        Result.unhealthy(
                                "FAIL; %d stuck tables have more than %d delete attempts : %s; all stuck tables: %s",
                                permanentStuckTables.size(), MAX_FAILED_ATTEMPTS_BEFORE_CRITICAL,
                                permanentStuckTables, allStuckTables
                        );
            }
        });
    }

    private void updateStuckTables() {
        final Set<String> allTables = kikimrScheme.listAllQloudTables();
        final int maxDays = dropTablesByTTLJob.getMaxDaysToKeepLog();
        final LocalDate currentDate = LocalDate.now();

        stuckTables = allTables.stream().filter((table) -> {
            LocalDate tableDate = TableUtils.parseDateFromTableName(table);
            return tableDate == null || tableDate.isBefore(currentDate.minusDays(maxDays + 2));
        }).collect(Collectors.toMap(Function.identity(), (table) -> firstNonNull(stuckTables.get(table), 0) + 1));
    }
}
