package ru.yandex.webmaster3.worker.clickhouse;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.node.TextNode;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;
import org.joda.time.DateTime;
import org.joda.time.Duration;
import org.joda.time.Instant;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;

import ru.yandex.webmaster3.core.solomon.HandleCommonMetricsService;
import ru.yandex.webmaster3.core.solomon.Indicators;
import ru.yandex.webmaster3.core.solomon.SolomonSensor;
import ru.yandex.webmaster3.core.util.json.JsonMapping;
import ru.yandex.webmaster3.core.worker.task.PeriodicTaskState;
import ru.yandex.webmaster3.core.worker.task.PeriodicTaskType;
import ru.yandex.webmaster3.core.worker.task.TaskResult;
import ru.yandex.webmaster3.storage.util.clickhouse2.ClickhouseHost;
import ru.yandex.webmaster3.storage.util.clickhouse2.ClickhouseHostLocation;
import ru.yandex.webmaster3.storage.util.clickhouse2.MdbClickhouseServer;
import ru.yandex.webmaster3.storage.util.yt.YtJobSpec;
import ru.yandex.webmaster3.storage.util.yt.YtNode;
import ru.yandex.webmaster3.storage.util.yt.YtNodeAttributes;
import ru.yandex.webmaster3.storage.util.yt.YtOperation;
import ru.yandex.webmaster3.storage.util.yt.YtOperationFilePathSpec;
import ru.yandex.webmaster3.storage.util.yt.YtOperationId;
import ru.yandex.webmaster3.storage.util.yt.YtPath;
import ru.yandex.webmaster3.storage.util.yt.YtService;
import ru.yandex.webmaster3.storage.util.yt.YtVanillaCommand;
import ru.yandex.webmaster3.storage.ytimport.YtClickhouseImportStateEnum;
import ru.yandex.webmaster3.storage.ytimport.dao.YtClickhouseImportQueueYDao;
import ru.yandex.webmaster3.storage.ytimport.dao.YtClickhouseImportQueueYDao.YtClickhouseImportRecord;
import ru.yandex.webmaster3.storage.ytimport.dao.YtClickhouseImportQueueYDao.YtClickhouseImportTable;
import ru.yandex.webmaster3.worker.PeriodicTask;
import ru.yandex.webmaster3.worker.TaskSchedule;
import ru.yandex.webmaster3.worker.ytimport.YtImportTaskInfo.CompressionMode;

/**
 * Created by Oleg Bazdyrev on 29/01/2021.
 */
@Slf4j
@Service
@RequiredArgsConstructor(onConstructor_ = @Autowired)
public class MdbYtClickhouseImportTask extends PeriodicTask<MdbYtClickhouseImportTask.TaskState> {

    private static final String SECTION_NAME = "yt2ch_import";
    private static final String ATTR_SPEC = "spec";
    private static final String ATTR_UPLOADER_OP_ID = "clickhouseUploaderOperationId";
    private static final Duration MAX_UPLOADER_OP_AGE = Duration.standardDays(1L);
    private static final List<String> CLUSTERS = Arrays.asList("hahn", "arnold");
    private static final String LOGS_TABLE_NAME = "logs";
    private static final String LOGS_CURRENT_TABLE_NAME = "logs-current";
    private static final Pattern DC_SHARD_PATTERN = Pattern.compile("[a-zA-Z-]+-[0-9]+");

    private final MdbClickhouseServer legacyMdbClickhouseServer;
    private final YtService ytService;
    private final YtClickhouseImportQueueYDao ytClickhouseImportQueueYDao;
    private final HandleCommonMetricsService handleCommonMetricsService;

    @Value("${external.yt.service.locke.root.default}/importer/legacy-tasks")
    private YtPath tasksBasePath;
    @Value("${external.yt.service.hahn.root.default}/bin/clickhouse-uploader")
    private YtPath uploaderBinary;
    @Value("${webmaster3.core.logbroker.oauth.token}")
    private String logbrokerOauthToken;
    @Value("${webmaster3.core.logbroker.clickhouseUploaderEvents.topic}")
    private String eventsTopic;

    public void init() {
        ytService.inTransaction(tasksBasePath).execute(cypressService -> {
            for (String cluster : CLUSTERS) {
                cypressService.create(YtPath.path(tasksBasePath, cluster), YtNode.NodeType.MAP_NODE, true, null, true);
            }
            return true;
        });
    }

    private static YtClickhouseImportJob createJob(YtClickhouseImportRecord record, YtClickhouseImportTable table) {
        return new YtClickhouseImportJob(record.getId(), DateTime.now(), table.getYtTable(), record.getDatabase() + "." + table.getChTable(),
                table.getChCreateSpec(), record.getChInsertSpec(), table.getDc(), table.getShard(), false,
                CompressionMode.GZIP);
    }

    private static String chHostId(ClickhouseHost host) {
        return host.getDcName() + "-" + host.getShard();
    }

    @Override
    public Result run(UUID runId) throws Exception {
        // check that uploader vanilla job is running
        for (String cluster : CLUSTERS) {
            try {
                startClickhouseUploader(cluster);
            } catch (Exception e) {
                log.error("Could not start uploader on cluster {}:", cluster, e);
            }
        }
        setState(new TaskState());
        ytService.withoutTransaction(cypressService -> {
            // update subtask statuses
            List<YtClickhouseImportJob> finishedJobs = new ArrayList<>();
            Set<YtClickhouseImportJob> runningJobs = new HashSet<>();
            Map<String, List<YtClickhouseImportJob>> runningJobsMap = new HashMap<>();
            List<YtPath> chTasksPaths = new ArrayList<>();
            for (String cluster : CLUSTERS) {
                chTasksPaths.addAll(cypressService.list(YtPath.path(tasksBasePath, cluster)));
            }
            for (YtPath chTasksPath : chTasksPaths) {
                String chName = chTasksPath.getName();
                if (!DC_SHARD_PATTERN.matcher(chName).matches()) {
                    continue;
                }
                runningJobsMap.computeIfAbsent(chName, (k) -> new ArrayList<>());
                // check all subtask
                for (YtPath subtaskNode : cypressService.list(chTasksPath)) {
                    YtNode node = cypressService.getNode(subtaskNode);
                    YtClickhouseImportJob job = JsonMapping.readValue(node.getNodeMeta().get("spec").traverse(), YtClickhouseImportJob.class);
                    if (job.isDone()) {
                        finishedJobs.add(job);
                        cypressService.remove(subtaskNode);
                        getState().getFinishedJobs().add(job);
                    } else {
                        runningJobs.add(job);
                        runningJobsMap.get(chName).add(job);
                    }
                }
            }
            // collect stats for monitoring
            DateTime now = DateTime.now();
            List<SolomonSensor> sensors = new ArrayList<>();
            for (Map.Entry<String, List<YtClickhouseImportJob>> entry : runningJobsMap.entrySet()) {
                // data age
                DateTime minStartDate = entry.getValue().stream().map(YtClickhouseImportJob::getStarted).min(Comparator.naturalOrder()).orElse(now);
                sensors.add(SolomonSensor.createAligned(300, (now.getMillis() - minStartDate.getMillis()) / 1000)
                        .withLabel(SolomonSensor.LABEL_SECTION, SECTION_NAME)
                        .withLabel("clickhouse_host", entry.getKey())
                        .withLabel(SolomonSensor.LABEL_INDICATOR, Indicators.DATA_AGE));
                sensors.add(SolomonSensor.createAligned(300, entry.getValue().size())
                        .withLabel(SolomonSensor.LABEL_SECTION, SECTION_NAME)
                        .withLabel("clickhouse_host", entry.getKey())
                        .withLabel(SolomonSensor.LABEL_INDICATOR, Indicators.QUEUE_SIZE));
            }
            handleCommonMetricsService.handle(sensors, 100 * sensors.size());
            // updating affected tasks
            Map<UUID, List<YtClickhouseImportJob>> finishedJobsMap = finishedJobs.stream().collect(Collectors.groupingBy(YtClickhouseImportJob::getId));
            finishedJobsMap.forEach((id, jobs) -> {
                YtClickhouseImportRecord state = ytClickhouseImportQueueYDao.getTask(id);
                if (state != null) {
                    state.getTables().replaceAll(t -> {
                        if (jobs.contains(createJob(state, t))) {
                            return t.toBuilder().done(true).build();
                        } else {
                            return t;
                        }
                    });
                    var builder = state.toBuilder();
                    if (state.getTables().stream().allMatch(YtClickhouseImportTable::isDone)) {
                        builder.state(YtClickhouseImportStateEnum.DONE);
                    }
                    builder.lastUpdate(Instant.now());
                    ytClickhouseImportQueueYDao.update(builder.build());
                }
            });
            List<ClickhouseHost> allChHosts = new ArrayList<>(legacyMdbClickhouseServer.getHosts());
            Collections.shuffle(allChHosts);
            Map<Integer, List<ClickhouseHost>> chHostsByShard = allChHosts.stream().collect(Collectors.groupingBy(ClickhouseHostLocation::getShard));
            chHostsByShard.put(null, legacyMdbClickhouseServer.getHosts());
            // create new and maybe reorder existing tasks
            for (YtClickhouseImportRecord record : ytClickhouseImportQueueYDao.listActive()) {
                // get unfinished tables
                for (YtClickhouseImportTable table : record.getTables()) {
                    if (table.isDone()) {
                        continue;
                    }
                    YtClickhouseImportJob job = createJob(record, table);
                    if (runningJobs.contains(job)) {
                        continue;
                    }
                    // find
                    List<ClickhouseHost> chHosts = chHostsByShard.get(job.getShard());
                    if (job.getDc() != null) {
                        chHosts = chHosts.stream().filter(c -> c.getDcName().equals(job.getDc())).collect(Collectors.toList());
                    }
                    // preventive tables cleaning
                    chHosts.forEach(chHost -> legacyMdbClickhouseServer.execute(chHost, "DROP TABLE IF EXISTS " +
                            record.getDatabase() + "." + table.getChTable() + " NO DELAY"));
                    ClickhouseHost host = chHosts.stream()
                            .min(Comparator.comparingInt(c -> runningJobsMap.getOrDefault(chHostId(c), Collections.emptyList()).size()))
                            .orElseThrow();

                    // create job
                    String jobName = record.getPriority().value() + "_" + record.getId().toString() + "_" + record.getDatabase() + "_" + table.getChTable();
                    YtPath jobPath = YtPath.path(YtPath.path(YtPath.path(tasksBasePath, job.getYtTable().getCluster()), chHostId(host)), jobName);
                    cypressService.create(jobPath, YtNode.NodeType.MAP_NODE, true, YtNodeAttributes.create(ATTR_SPEC, job));
                    runningJobs.add(job);
                    runningJobsMap.computeIfAbsent(chHostId(host), (ign) -> new ArrayList<>()).add(job);
                    getState().getNewJobs().add(job);
                }
            }
            return true;
        });

        return new Result(TaskResult.SUCCESS);
    }

    private void startClickhouseUploader(String cluster) throws InterruptedException {
        ytService.withoutTransaction(cypressService -> {
            log.info("Checking for YT to Clickhouse uploader on cluster {}", cluster);
            YtPath basePath = YtPath.path(tasksBasePath, cluster);
            YtNode node = cypressService.getNode(basePath);
            YtOperation operation = null;
            if (node.getNodeMeta().has(ATTR_UPLOADER_OP_ID)) {
                String opId = node.getNodeMeta().get(ATTR_UPLOADER_OP_ID).asText();
                log.info("YT to Clickhouse uploader operation id: {}", opId);
                operation = cypressService.getOperation(YtOperationId.fromString(opId));
                if (operation == null) {
                    log.info("Operation {} was lost", opId);
                } else {
                    log.info("Operation {} has state {}", opId, operation.getState());
                }
            } else {
                log.info("Attribute {} not found", ATTR_UPLOADER_OP_ID);
            }
            // check that operation is running
            YtPath currentLogsTable = YtPath.path(tasksBasePath, LOGS_CURRENT_TABLE_NAME);
            if (operation == null || operation.getState().isTerminal()) {
                log.info("Operation doesn't exists or aborted. Restarting");
                var vanillaCommandBuilder = YtVanillaCommand.newBuilder();
                // restarting operation
                for (ClickhouseHost clickhouseHost : legacyMdbClickhouseServer.getHosts()) {
                    String chName = chHostId(clickhouseHost);
                    YtPath jobPath = YtPath.path(basePath, chName);
                    cypressService.create(jobPath, YtNode.NodeType.MAP_NODE, true, null, true);
                    YtJobSpec jobSpec = YtJobSpec.newBuilder()
                            .setCommand(new YtOperationFilePathSpec(uploaderBinary.toYtPath(), true, "uploader"), Arrays.asList(
                                    "--ch_address", clickhouseHost.getHostURI().getHost(),
                                    "--yt_tasks_path", jobPath.getPathWithoutCluster(),
                                    "--ch_username", legacyMdbClickhouseServer.getServiceUserCredentials().getUserName(),
                                    "--yt_log_table", currentLogsTable.toString(),
                                    "--lb_log_topic", eventsTopic
                            ))
                            .addEnvironmentVariable("YT_FORBID_REQUESTS_FROM_JOB", "0")
                            .addEnvironmentVariable("YT_ALLOW_HTTP_REQUESTS_TO_YT_FROM_JOB", "1")
                            .setMemoryLimit(2L * 1024L * 1024L * 1024L) // 2G
                            .setJobCount(1)
                            .build();
                    vanillaCommandBuilder.addTask(chName, jobSpec);
                }
                YtVanillaCommand command = vanillaCommandBuilder
                        .setYtCluster(cluster)
                        .addSecureVault("YT_TOKEN", ytService.getClusters().get(cluster).getToken())
                        .addSecureVault("CH_PASSWORD", legacyMdbClickhouseServer.getServiceUserCredentials().getPassword())
                        .addSecureVault("LB_OAUTH_TOKEN", logbrokerOauthToken)
                        .addSpec("title", "YT to Clickhouse Uploader")
                        .addSpec("max_failed_job_count", 1)
                        //.addSpec("stderr_table_path", "//home/webmaster/test/tmp/clickhouse-uploader-log")
                        .build();
                YtOperationId operationId = cypressService.startOperation(command);
                cypressService.set(YtPath.attribute(basePath, ATTR_UPLOADER_OP_ID), TextNode.valueOf(operationId.toString()));
            }
            // merge logs if needed
            if (cypressService.exists(currentLogsTable)) {
                YtNode logsNode = cypressService.getNode(currentLogsTable);
                if (logsNode != null && logsNode.getNodeMeta().get("row_count").asLong() >= 100) {
                    YtPath logsTable = YtPath.path(tasksBasePath, LOGS_TABLE_NAME);
                    YtPath tempCurrentLogsTable = YtPath.path(tasksBasePath, LOGS_CURRENT_TABLE_NAME + ".tmp");
                    cypressService.move(currentLogsTable, tempCurrentLogsTable, false, true);
                    List<YtPath> tables = new ArrayList<>();
                    tables.add(tempCurrentLogsTable);
                    if (cypressService.exists(logsTable)) {
                        tables.add(logsTable);
                    }
                    cypressService.waitFor(cypressService.sort(tables, logsTable, "started"));
                    cypressService.remove(tempCurrentLogsTable);
                }
            }
            return true;
        });
    }

    @Override
    public PeriodicTaskType getType() {
        return PeriodicTaskType.YT_TO_MDB_CLICKHOUSE_IMPORT;
    }

    @Override
    public TaskSchedule getSchedule() {
        return TaskSchedule.startByCron("0 */5 * * * *");
    }

    @Getter
    @AllArgsConstructor
    @Builder(toBuilder = true)
    @EqualsAndHashCode(of = {"ytTable", "chTable", "shard", "dc"})
    public static class YtClickhouseImportJob {
        public static final TypeReference<List<YtClickhouseImportJob>> LIST_REFERENCE = new TypeReference<>() {
        };
        UUID id;
        DateTime started;
        YtPath ytTable;
        String chTable;
        String chCreateSpec;
        String chInsertSpec;
        String dc;
        Integer shard;
        boolean done;
        CompressionMode dataCompressionMode;
    }

    @Getter
    @Setter
    public static class TaskState implements PeriodicTaskState {
        private List<YtClickhouseImportJob> newJobs = new ArrayList<>();
        private List<YtClickhouseImportJob> finishedJobs = new ArrayList<>();
    }
}
