package ru.yandex.webmaster3.worker.host;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.mutable.MutableLong;
import org.joda.time.DateTime;
import org.joda.time.Duration;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;

import ru.yandex.webmaster3.core.data.WebmasterHostId;
import ru.yandex.webmaster3.core.util.IdUtils;
import ru.yandex.webmaster3.core.util.RetryUtils;
import ru.yandex.webmaster3.core.worker.task.PeriodicTaskState;
import ru.yandex.webmaster3.core.worker.task.PeriodicTaskType;
import ru.yandex.webmaster3.storage.host.AllHostsCacheService;
import ru.yandex.webmaster3.storage.host.AllVerifiedHostsCacheService;
import ru.yandex.webmaster3.storage.host.dao.DeletedUnverifiedHostsYDao;
import ru.yandex.webmaster3.storage.host.dao.HostsYDao;
import ru.yandex.webmaster3.storage.host.service.HostService;
import ru.yandex.webmaster3.storage.user.service.UserHostsService;
import ru.yandex.webmaster3.storage.util.yt.YtColumn;
import ru.yandex.webmaster3.storage.util.yt.YtNode;
import ru.yandex.webmaster3.storage.util.yt.YtNodeAttributes;
import ru.yandex.webmaster3.storage.util.yt.YtPath;
import ru.yandex.webmaster3.storage.util.yt.YtSchema;
import ru.yandex.webmaster3.storage.util.yt.YtService;
import ru.yandex.webmaster3.worker.PeriodicTask;
import ru.yandex.webmaster3.worker.TaskSchedule;

/**
 * @author leonidrom
 */
@Component
@Slf4j
@RequiredArgsConstructor(onConstructor_ = @Autowired)
public class CleanUnverifiedHostsTask extends PeriodicTask<CleanUnverifiedHostsTask.TaskState> {
    private static final int MAX_HOSTS_TO_DELETE_PER_RUN = 3_000_000;
    private static final Duration MIN_HOST_AGE = Duration.standardDays(30);
    private static final RetryUtils.RetryPolicy RETRY_POLICY = RetryUtils.linearBackoff(5,
            Duration.standardSeconds(60));
    private static final int TOTAL_THREADS = 16;
    private static final int BATCH_SIZE = 4096;
    private static final YtSchema YT_TABLE_SCHEMA = new YtSchema();
    private static final YtColumn<String> HOST_ID = YT_TABLE_SCHEMA.addColumn("host_id", YtColumn.Type.STRING);

    private final AllVerifiedHostsCacheService allVerifiedHostsCacheService;
    private final AllHostsCacheService allHostsCacheService;
    private final DeletedUnverifiedHostsYDao deletedUnverifiedHostsYDao;
    private final HostService hostService;
    private final HostsYDao hostsYDao;
    private final UserHostsService userHostsService;
    private final YtService ytService;

    @Value("${application.folders.data}/clean_unverified_hosts.gz")
    private File hostsToCleanFile;

    @Value("${external.yt.service.arnold.root.default}/unverified_hosts_cleaned")
    private YtPath archiveDir;

    private Map<WebmasterHostId, DateTime> deleteCandidates;

    @Override
    public Result run(UUID runId) throws Exception {
        setState(new TaskState());

        log.info("Collecting delete candidates");
        deleteCandidates = new HashMap<>();
        deletedUnverifiedHostsYDao.forEach(r -> {
            if (r.getDeleteDate() == null) {
                deleteCandidates.put(r.getHostId(), r.getAddDate());
            }
        });
        log.info("Done collecting delete candidates: {}", deleteCandidates.size());

        log.info("Collecting hosts to clean...");
        collectHosts();
        log.info("Done collecting hosts to clean");

        log.info("Cleaning hosts...");
        cleanHosts();
        log.info("Done cleaning hosts");

        String tableName = "hosts-" + System.currentTimeMillis() / 1000;
        YtPath tablePath = YtPath.path(archiveDir, tableName);
        log.info("Uploading cleaned hosts to {} ...", tablePath);
        uploadHostsToYt(tablePath);
        log.info("Done uploading cleaned hosts");

        return Result.SUCCESS;
    }

    private void collectHosts() throws Exception {
        ExecutorService executorService = ru.yandex.common.util.concurrent.Executors.newBlockingFixedThreadPool(
                TOTAL_THREADS, TOTAL_THREADS,
                0, TimeUnit.MILLISECONDS,
                new ArrayBlockingQueue<>(TOTAL_THREADS),
                Executors.defaultThreadFactory());

        Set<WebmasterHostId> batch = new HashSet<>();
        List<Future<Integer>> futures = new ArrayList<>();
        var hostsToCleanCount = new AtomicLong();
        var totalHostsCount = new MutableLong();
        try (var pw = new PrintWriter(new BufferedWriter(new OutputStreamWriter(
                new GZIPOutputStream(new FileOutputStream(hostsToCleanFile)), StandardCharsets.UTF_8)))) {

            allHostsCacheService.foreachHost(hostId -> {
                if (totalHostsCount.incrementAndGet() % 10_000 == 0) {
                    log.info("Processed hosts: {}", totalHostsCount.getValue());
                }

                if (hostsToCleanCount.get() >= MAX_HOSTS_TO_DELETE_PER_RUN) {
                    log.error("MAX_HOSTS_TO_DELETE_PER_RUN limit reached");
                    throw new DoneException();
                }

                batch.add(hostId);
                if (batch.size() >= BATCH_SIZE) {
                    var batchCopy = new ArrayList<>(batch);
                    futures.add(executorService.submit(() -> collectHostsBatch(batchCopy, pw)));
                    batch.clear();
                }
            });

            if (!batch.isEmpty()) {
                var batchCopy = new ArrayList<>(batch);
                futures.add(executorService.submit(() -> collectHostsBatch(batchCopy, pw)));
            }

            for (var f : futures) {
                state.totalHostsCleaned += f.get();
            }

            state.totalHosts = totalHostsCount.getValue();
        } catch (DoneException e) {
            // ignore
        } catch (Exception e) {
            log.error("Error collecting unverified hosts", e);
            throw e;
        } finally {
            executorService.shutdownNow();
        }
    }

    private int collectHostsBatch(List<WebmasterHostId> hosts, PrintWriter pw) throws InterruptedException {
        var cleanThresholdDate = DateTime.now().minus(MIN_HOST_AGE);
        int hostsToCleanCount = 0;
        List<WebmasterHostId> hostsToAdd = new ArrayList<>();
        for (var hostId : hosts) {
            var addDate = deleteCandidates.getOrDefault(hostId, null);
            if (isVerified(hostId)) {
                if (addDate != null) {
                    // расподтвержденный, но пока не удаленный хост снова подтвердился, удалим его из таблицы кандидатов
                    RetryUtils.execute(RETRY_POLICY, () -> {
                        deletedUnverifiedHostsYDao.delete(hostId);
                    });
                }
            } else {
                if (addDate != null) {
                    if (cleanThresholdDate.isAfter(addDate)) {
                        // хост можно удалять
                        pw.println(hostId);
                        hostsToCleanCount++;
                    }
                } else {
                    hostsToAdd.add(hostId);
                }
            }
        }

        if (!hostsToAdd.isEmpty()) {
            RetryUtils.execute(RETRY_POLICY, () -> {
                deletedUnverifiedHostsYDao.addBatch(hostsToAdd, DateTime.now());
            });
        }

        return hostsToCleanCount;
    }

    private boolean isVerified(WebmasterHostId hostId) {
        // для надежности проверяем и по базе тоже
        return allVerifiedHostsCacheService.contains(hostId) || userHostsService.isHostVerified(hostId);
    }

    private void cleanHosts() throws Exception {
        ExecutorService executorService = ru.yandex.common.util.concurrent.Executors.newBlockingFixedThreadPool(
                TOTAL_THREADS, TOTAL_THREADS,
                0, TimeUnit.MILLISECONDS,
                new ArrayBlockingQueue<>(TOTAL_THREADS),
                Executors.defaultThreadFactory());

        List<Future<Void>> futures = new ArrayList<>();
        List<WebmasterHostId> batch = new ArrayList<>();
        try {
            var is = new GZIPInputStream(new FileInputStream(hostsToCleanFile));
            var lineIterator = IOUtils.lineIterator(is, StandardCharsets.UTF_8);
            // вычитываем хосты из файла
            while (lineIterator.hasNext()) {
                var hostId = IdUtils.stringToHostId(lineIterator.next());
                batch.add(hostId);

                // запускаем батчем на обработку
                if (batch.size() >= BATCH_SIZE) {
                    var batchCopy = new ArrayList<>(batch);
                    futures.add(executorService.submit(() -> cleanHostsBatch(batchCopy)));
                    batch.clear();
                }
            }

            if (!batch.isEmpty()) {
                futures.add(executorService.submit(() -> cleanHostsBatch(new ArrayList<>(batch))));
            }

            // дождемся пока все обработается
            for (var f : futures) {
                f.get();
            }
        } finally {
            executorService.shutdownNow();
        }
    }

    private Void cleanHostsBatch(List<WebmasterHostId> batch) throws Exception {
        RetryUtils.execute(RETRY_POLICY, () -> {
            hostService.cleanUnverifiedHosts(batch);
            deletedUnverifiedHostsYDao.markDeleted(batch, DateTime.now());
        });

        return null;
    }

    private void uploadHostsToYt(YtPath tablePath) {
        var tableData = ytService.prepareTableData(tablePath.getName(), tableWriter -> {
            try {
                var is = new GZIPInputStream(new FileInputStream(hostsToCleanFile));
                var lineIterator = IOUtils.lineIterator(is, StandardCharsets.UTF_8);
                while (lineIterator.hasNext()) {
                    var hostId = lineIterator.next();
                    HOST_ID.set(tableWriter, hostId);
                    tableWriter.rowEnd();
                }
            } catch (Exception e) {
                throw new RuntimeException(e);
            }

        });

        ytService.inTransaction(tablePath).execute(cypressService -> {
            var attributes = new YtNodeAttributes().setSchema(YT_TABLE_SCHEMA);
            cypressService.create(tablePath, YtNode.NodeType.TABLE, true, attributes);
            cypressService.writeTable(tablePath, tableData);

            return true;
        });
    }

    @Override
    public PeriodicTaskType getType() {
        return PeriodicTaskType.CLEAN_UNVERIFIED_HOSTS;
    }

    @Override
    public TaskSchedule getSchedule() {
        return TaskSchedule.startByCron("0 0 0 * * 1");
    }

    private static class DoneException extends RuntimeException {
    }

    public static class TaskState implements PeriodicTaskState {
        public long totalHosts;
        public long totalHostsCleaned;
    }
}
