package ru.yandex.webmaster3.monitoring.metrika;

import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.commons.lang3.mutable.MutableLong;
import org.apache.commons.lang3.tuple.Pair;
import org.joda.time.DateTime;
import org.joda.time.Duration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.annotation.Scheduled;
import ru.yandex.webmaster3.core.solomon.HandleCommonMetricsService;
import ru.yandex.webmaster3.core.solomon.Indicators;
import ru.yandex.webmaster3.core.solomon.SolomonSensor;
import ru.yandex.webmaster3.storage.host.CommonDataState;
import ru.yandex.webmaster3.storage.host.CommonDataType;
import ru.yandex.webmaster3.storage.metrika.dao.MetrikaCounterBindingStateYDao;
import ru.yandex.webmaster3.storage.metrika.dao.MetrikaCrawlStateData;
import ru.yandex.webmaster3.storage.metrika.dao.MetrikaCrawlStateYDao;
import ru.yandex.webmaster3.storage.metrika.data.MetrikaCounterCrawlStateEnum;
import ru.yandex.webmaster3.storage.settings.dao.CommonDataStateYDao;
import ru.yandex.webmaster3.storage.util.ydb.exception.WebmasterYdbException;
import ru.yandex.webmaster3.storage.util.yt.*;
import ru.yandex.webmaster3.storage.yql.YqlService;

import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadFactory;
import java.util.stream.Collectors;

import static java.util.stream.Collectors.toMap;

/**
 * @author leonidrom
 */
public class MetrikaUrlsCrawlMonitoringService {
    private static final Logger log = LoggerFactory.getLogger(MetrikaUrlsCrawlMonitoringService.class);
    private static final String CRAWL_MONITORING_TABLE_PREFIX = "crawl-monitoring-";
    private static final Duration MAX_MONITORING_TABLE_AGE = Duration.standardDays(14);

    private static final String SECTION_LABEL_VALUE = "metrika_crawl";
    private static final String STATE_LABEL_VALUE = "state";
    private static final String PROCESSED_TABLES_DATA_TYPE = "processed_tables";
    private static final String BAD_URLS_DATA_TYPE = "bad_urls";
    private static final String CRAWL_STATE_DATA_TYPE = "crawl_state";
    private static final int AVERAGE_SENSORS_SIZE = 600;
    private static final int BATCH_SIZE = 1000;
    private static final int REFRESH_INTERVAL_SECONDS = 1800;

    private static final YtSchema YT_TABLE_SCHEMA = new YtSchema();
    private static final YtColumn<String> DOMAIN = YT_TABLE_SCHEMA.addColumn("domain", YtColumn.Type.STRING);
    private static final YtColumn<Long> COUNTER_ID = YT_TABLE_SCHEMA.addColumn("counter_id", YtColumn.Type.INT_64);
    private static final YtColumn<Boolean> CRAWL_ENABLED = YT_TABLE_SCHEMA.addColumn("crawl_enabled", YtColumn.Type.BOOLEAN);
    private static final YtColumn<Long> UPDATED = YT_TABLE_SCHEMA.addColumn("updated", YtColumn.Type.INT_64);

    private final HandleCommonMetricsService handleCommonMetricsService;
    private final MetrikaCounterBindingStateYDao metrikaCounterBindingStateYDao;
    private final MetrikaCrawlStateYDao metrikaCrawlStateYDao;
    private final CommonDataStateYDao commonDataStateYDao;

    private final YtService ytService;
    private final YqlService yqlService;
    private final YtPath tmpTablePath;
    private final YtPath robotTablesDir;
    private final YtPath resultTablesDir;

    private final ThreadFactory threadFactory = new ThreadFactoryBuilder()
            .setDaemon(true)
            .build();
    private final ExecutorService executorService = Executors.newFixedThreadPool(32, threadFactory);

    @Autowired
    public MetrikaUrlsCrawlMonitoringService(
            HandleCommonMetricsService handleCommonMetricsService,
            MetrikaCounterBindingStateYDao metrikaCounterBindingStateYDao,
            MetrikaCrawlStateYDao metrikaCrawlStateYDao,
            CommonDataStateYDao commonDataStateYDao,
            YtService ytService,
            YqlService yqlService,
            @Value("${webmaster3.monitoring.metrika.urlsCrawl.tmpTablePath}") YtPath tmpTablePath,
            @Value("${webmaster3.monitoring.metrika.urlsCrawl.robotTablesDir}") YtPath robotTablesDir,
            @Value("${webmaster3.monitoring.metrika.urlsCrawl.resultTablesDir}") YtPath resultTablesDir) {
        this.handleCommonMetricsService = handleCommonMetricsService;
        this.metrikaCounterBindingStateYDao = metrikaCounterBindingStateYDao;
        this.metrikaCrawlStateYDao = metrikaCrawlStateYDao;
        this.commonDataStateYDao = commonDataStateYDao;
        this.ytService = ytService;
        this.yqlService = yqlService;
        this.tmpTablePath = tmpTablePath;
        this.robotTablesDir = robotTablesDir;
        this.resultTablesDir = resultTablesDir;
    }

    @Scheduled(cron = "0 0/30 * * * *")
    private void push() {
        List<YtPath> robotTables = getRobotTablesToProcess();
        if (robotTables.isEmpty()) {
            log.info("No new Robot tables to process");
            return;
        }
        log.info("Robot tables to process: {}", Arrays.toString(robotTables.toArray()));

        EnumMap<MetrikaCounterCrawlStateEnum, Long> counterStateCounts = new EnumMap<>(MetrikaCounterCrawlStateEnum.class);
        uploadMetrikaCrawlSettings(counterStateCounts);

        long now = System.currentTimeMillis();
        List<SolomonSensor> sensors = new ArrayList<>();
        for (var state : MetrikaCounterCrawlStateEnum.values()) {
            long count = counterStateCounts.getOrDefault(state, 0L);
            log.info("State: {}, count: {}", state, count);
            sensors.add(SolomonSensor.createAligned(now, REFRESH_INTERVAL_SECONDS, count)
                    .withLabel(SolomonSensor.LABEL_SECTION, SECTION_LABEL_VALUE)
                    .withLabel(STATE_LABEL_VALUE, state.toString())
                    .withLabel(SolomonSensor.LABEL_INDICATOR, Indicators.COUNT)
                    .withLabel(SolomonSensor.LABEL_DATA_TYPE, CRAWL_STATE_DATA_TYPE));
        }

        long totalBadUrls = getBadUrlsCount(robotTables);
        log.info("Total bad urls: {}", totalBadUrls);
        sensors.add(SolomonSensor.createAligned(now, REFRESH_INTERVAL_SECONDS, totalBadUrls)
                .withLabel(SolomonSensor.LABEL_SECTION, SECTION_LABEL_VALUE)
                .withLabel(SolomonSensor.LABEL_INDICATOR, Indicators.COUNT)
                .withLabel(SolomonSensor.LABEL_DATA_TYPE, BAD_URLS_DATA_TYPE));

        String lastTable = robotTables.get(robotTables.size() - 1).getName();
        commonDataStateYDao.update(new CommonDataState(
                CommonDataType.LAST_ROBOT_CRAWL_WATCHDOG_TABLE_PROCESSED, lastTable, DateTime.now()));

        sensors.add(SolomonSensor.createAligned(now, REFRESH_INTERVAL_SECONDS, (now - DateTime.parse(lastTable).getMillis()) / 1000L)
                .withLabel(SolomonSensor.LABEL_SECTION, SECTION_LABEL_VALUE)
                .withLabel(SolomonSensor.LABEL_INDICATOR, Indicators.DATA_AGE)
                .withLabel(SolomonSensor.LABEL_DATA_TYPE, PROCESSED_TABLES_DATA_TYPE));

        handleCommonMetricsService.handle(sensors, AVERAGE_SENSORS_SIZE);


        try {
            cleanupOldMonitoringTables();
        } catch (InterruptedException e) {
            log.error("Error cleaning old tables", e);
        }
    }

    private List<YtPath> getRobotTablesToProcess() {
        CommonDataState cds = commonDataStateYDao.getValue(CommonDataType.LAST_ROBOT_CRAWL_WATCHDOG_TABLE_PROCESSED);
        String lastTableProcessed = cds == null ? null : cds.getValue();
        log.info("Last table processed: {}", lastTableProcessed);

        List<YtPath> tablesToProcess = new ArrayList<>();
        ytService.inTransaction(robotTablesDir).execute(cypressService -> {
            List<YtPath> allTables = cypressService.list(robotTablesDir);
            if (lastTableProcessed != null) {
                allTables = allTables.stream()
                        .filter(t -> t.getName().compareTo(lastTableProcessed) > 0)
                        .collect(Collectors.toList());
            }

            tablesToProcess.addAll(allTables);

            return true;
        });

        tablesToProcess.sort(Comparator.comparing(YtPath::getName));

        return tablesToProcess;
    }

    /**
     * Выгружает в Yt текущее состояние настроек обхода по счетчикам Метрики
     */
    private void uploadMetrikaCrawlSettings(EnumMap<MetrikaCounterCrawlStateEnum, Long> counterStateCounts) {
        log.info("Started uploading metrika-crawl-settings table");

        DateTime now = DateTime.now();
        List<Pair<String, Long>> batch = new ArrayList<>();
        var tableData = ytService.prepareTableData("metrika-crawl-settings", tableWriter -> {
            metrikaCounterBindingStateYDao.forEachLink(b -> {
                String domain = b.getDomain();
                long counterId = b.getCounterId();

                if (b.getCounterBindingState().isApproved()) {
                    batch.add(Pair.of(domain, counterId));
                    if (batch.size() == BATCH_SIZE) {
                        flushBatch(tableWriter, batch, counterStateCounts);
                        batch.clear();
                    }
                } else {
                    // Также выгрузим счетчики без подтвержденных провязок. Они нужны для отсечения валидных случев
                    // когда провязка перестала быть валидной и Робот еще не перестал обходить его урлы.
                    try {
                        DateTime updateDate = adjustUpdateDate(b.getUpdateDate());
                        if (updateDate.plusDays(30).isBefore(now)) {
                            // чтобы не выгружать слишком много
                            return;
                        }

                        DOMAIN.set(tableWriter, domain);
                        COUNTER_ID.set(tableWriter, counterId);
                        CRAWL_ENABLED.set(tableWriter, false);
                        UPDATED.set(tableWriter, b.getUpdateDate().getMillis()/ 1000L);

                        tableWriter.rowEnd();
                    } catch (YtException e) {
                        log.error("Error preparing Yt table data", e);
                        throw new RuntimeException(e);
                    }
                }

            }, false);

            if (!batch.isEmpty()) {
                flushBatch(tableWriter, batch, counterStateCounts);
            }
        });

        ytService.inTransaction(tmpTablePath).execute(cypressService -> {
            YtNodeAttributes attributes = new YtNodeAttributes().setSchema(YT_TABLE_SCHEMA);
            if (cypressService.exists(tmpTablePath)) {
                cypressService.remove(tmpTablePath);
            }

            cypressService.create(tmpTablePath, YtNode.NodeType.TABLE, true, attributes);
            cypressService.writeTable(tmpTablePath, tableData);

            return true;
        });

        log.info("Finished uploading metrika-crawl-settings table");
    }

    private void flushBatch(TableWriter tableWriter, List<Pair<String, Long>> batch,
                            EnumMap<MetrikaCounterCrawlStateEnum, Long> counterStateCounts) {
        List<MetrikaCrawlStateData> states = getStatesForBatch(batch);
        Map<Pair<String, Long>, MetrikaCrawlStateData> counterToState = states.stream()
                .collect(toMap(data -> Pair.of(data.getDomain(), data.getCounterId()), data -> data));

        for (Pair<String, Long> pair : batch) {
            MetrikaCrawlStateData data = counterToState.get(pair);
            MetrikaCounterCrawlStateEnum counterSate = data == null? MetrikaCounterCrawlStateEnum.DISABLED : data.getState();
            try {
                String domain = pair.getLeft();
                long counterId = pair.getRight();
                DOMAIN.set(tableWriter, domain);
                COUNTER_ID.set(tableWriter, counterId);
                CRAWL_ENABLED.set(tableWriter, counterSate == MetrikaCounterCrawlStateEnum.ENABLED);

                DateTime updateDate = null;
                if (data != null) {
                    updateDate = data.getUpdateDate();
                } else {
                    // записи про состояние обхода по счетчику нет в базе,
                    // используем дату изменения состояния провязки
                    var bindingState = metrikaCounterBindingStateYDao.get(domain, counterId);
                    if (bindingState != null) {
                        updateDate = bindingState.getUpdateDate();
                    }
                }

                updateDate = updateDate == null? new DateTime(0) : adjustUpdateDate(updateDate);
                UPDATED.set(tableWriter, updateDate.getMillis() / 1000L);

                tableWriter.rowEnd();
                counterStateCounts.compute(counterSate, (k, v) -> v == null? 1 : v + 1);
            } catch (YtException e) {
                log.error("Error preparing Yt table data", e);
                throw new RuntimeException(e);
            }
        }
    }

    // Мы отгружаем настройки обхода по счетчикам на Yt для Робота таской UPLOAD_METRIKA_CRAWL_SETTINGS,
    // которая стартует каждый день в 00:00, поэтому такие манипуляции с датой
    private static DateTime adjustUpdateDate(DateTime date) {
        return date.withTimeAtStartOfDay().plusDays(1);
    }

    private String getYqlQuery(List<YtPath> robotTables) {
        // 172800 здесь - это два дня, которые нужны Роботу, чтобы он перестал обходить урлы
        // после того как обход по счетчику был выключен
        String queryTemplate =
                "PRAGMA yt.QueryCacheMode=\"disable\";\n" +
                "use arnold;\n" +
                "$robot_urls = (\n" +
                "SELECT Url, OriginalUrl, Url::CutWWW(Url::HostNameToPunycode(Url::GetHost(Url::NormalizeWithDefaultHttpScheme(Url::CutQueryStringAndFragment(OriginalUrl))))) as Domain, " +
                "LastValidWatchLogCounterId, IsValidOnlyByWatchLog, LastAccess " +
                "FROM concat(%s)\n" +
                ");\n" +
                "$robot_urls_joined = (\n" +
                "SELECT t1.*, t2.crawl_enabled as crawl_enabled, t2.updated as updated FROM $robot_urls as t1 " +
                "LEFT JOIN `%s` as t2 " +
                "on t1.Domain = t2.domain and t1.LastValidWatchLogCounterId = t2.counter_id\n" +
                ");\n" +
                "$bad_urls = (\n" +
                "SELECT * FROM $robot_urls_joined WHERE crawl_enabled is null " +
                "or not (crawl_enabled = true or (crawl_enabled == false and LastAccess - updated < 172800))\n" +
                ");\n" +
                "INSERT INTO `%s` WITH TRUNCATE \n" +
                "SELECT Url, OriginalUrl, LastValidWatchLogCounterId, IsValidOnlyByWatchLog, LastAccess, " +
                "if(crawl_enabled is NULL, true, false) as IsUnknownCounter, " +
                "if (crawl_enabled == true, if (LastAccess < updated, true, false), false) as IsCrawlBeforeEnabled, " +
                "if (crawl_enabled == false, if (LastAccess - updated > 172800, true, false), false) as IsCrawlDisabled " +
                "FROM $bad_urls;\n" +
                "COMMIT;\n" +
                "SELECT count(*) as cnt FROM $bad_urls;\n";

        String robotTablesStr = robotTables.stream()
                .map(p -> "'" + p.toYtPath() + "'")
                .collect(Collectors.joining(","));
        String resultTableName = CRAWL_MONITORING_TABLE_PREFIX + robotTables.get(0).getName() + "_" + robotTables.get(robotTables.size() - 1).getName();
        YtPath resultTablePath = YtPath.path(resultTablesDir, resultTableName);
        String query = String.format(queryTemplate, robotTablesStr, tmpTablePath.toYtPath(), resultTablePath.toYtPath());
        log.info("YQL query: {}", query);

        return query;
    }

    private long getBadUrlsCount(List<YtPath> robotTables) {
        log.info("Started YQL script");
        String yqlQuery = getYqlQuery(robotTables);
        var count = new MutableLong();
        yqlService.query(yqlQuery, rs -> rs.getLong("cnt"), count::setValue);
        log.info("Finished YQL script");

        return count.getValue();
    }

    private void cleanupOldMonitoringTables() throws InterruptedException, YtException {
        List<YtPath> allTables = getAllMonitoringTables();
        for (YtPath tablePath : allTables) {
            DateTime tableTS = getTableTS(tablePath);

            if (tableTS.plus(MAX_MONITORING_TABLE_AGE).isBeforeNow()) {
                ytService.withoutTransaction(cypressService -> {
                    cypressService.remove(tablePath);
                    return true;
                });
            }
        }
    }

    private static DateTime getTableTS(YtPath tablePath) {
        String[] words = tablePath.getName().split("_");
        return DateTime.parse(words[words.length - 1]);
    }

    private List<YtPath> getAllMonitoringTables() throws YtException, InterruptedException {
        return ytService.withoutTransactionQuery(cypressService -> {
            return cypressService.list(resultTablesDir).stream()
                    .filter(t -> t.getName().startsWith(CRAWL_MONITORING_TABLE_PREFIX))
                    .collect(Collectors.toList());
        });
    }

    private List<MetrikaCrawlStateData> getStatesForBatch(List<Pair<String, Long>> batch) {
        List<Future<MetrikaCrawlStateData>> futures = new ArrayList<>();
        for (var p : batch) {
            String domain = p.getLeft();
            long counterId = p.getRight();
            var f = executorService.submit(() -> metrikaCrawlStateYDao.getState(domain, counterId));
            futures.add(f);
        }
        List<MetrikaCrawlStateData> res = new ArrayList<>();
        for (var f : futures) {
            try {
                res.add(f.get());
            } catch (Exception e) {
                throw new WebmasterYdbException("Failed to read from YDB", e);
            }
        }

        return res;
    }
}
