package ru.yandex.webmaster3.worker.addurl;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Preconditions;
import com.google.common.collect.Iterables;
import com.google.common.collect.Range;
import lombok.Data;
import lombok.RequiredArgsConstructor;
import org.jetbrains.annotations.NotNull;
import org.joda.time.DateTime;
import org.joda.time.Duration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import ru.yandex.webmaster3.core.WebmasterException;
import ru.yandex.webmaster3.core.addurl.RecrawlState;
import ru.yandex.webmaster3.core.addurl.UrlForRecrawl;
import ru.yandex.webmaster3.core.data.WebmasterHostId;
import ru.yandex.webmaster3.core.delurl.DelUrlRequest;
import ru.yandex.webmaster3.core.delurl.DelurlState;
import ru.yandex.webmaster3.core.http.WebmasterErrorResponse;
import ru.yandex.webmaster3.core.worker.task.PeriodicTaskState;
import ru.yandex.webmaster3.core.worker.task.PeriodicTaskType;
import ru.yandex.webmaster3.core.worker.task.TaskResult;
import ru.yandex.webmaster3.storage.abt.AbtService;
import ru.yandex.webmaster3.storage.delurl.DelUrlRequestsService;
import ru.yandex.webmaster3.storage.host.CommonDataState;
import ru.yandex.webmaster3.storage.host.CommonDataType;
import ru.yandex.webmaster3.storage.settings.dao.CommonDataStateYDao;
import ru.yandex.webmaster3.storage.util.clickhouse2.ClickhouseException;
import ru.yandex.webmaster3.storage.util.ydb.exception.WebmasterYdbException;
import ru.yandex.webmaster3.storage.util.yt.*;
import ru.yandex.webmaster3.worker.PeriodicTask;
import ru.yandex.webmaster3.worker.TaskSchedule;
import ru.yandex.wmtools.common.util.http.YandexHttpStatus;

import java.io.IOException;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import static com.fasterxml.jackson.core.JsonParser.Feature.ALLOW_SINGLE_QUOTES;

/**
 * @author tsyplyaev
 * @author aherman
 * @author ishalaru
 * @update 05.07.2019
 */
@RequiredArgsConstructor(onConstructor_ = @Autowired)
public class UpdateUrlStatePeriodicTask extends PeriodicTask<UpdateUrlStatePeriodicTask.TaskState> {
    private static final Logger log = LoggerFactory.getLogger(UpdateUrlStatePeriodicTask.class);

    private static final Duration RESEND_DEL_URL_AFTER = Duration.standardHours(3);

    // не будем проверять таблички старше 3 дней
    private static final Duration MAX_TABLE_AGE = Duration.standardDays(3);
    private static final Pattern RESULT_TABLE_NAME_PATTERN = Pattern.compile("recrawl_result.(\\d+)");
    private static final Pattern HOOK_RESULT_TABLE_NAME_PATTERN = Pattern.compile("recrawl_hook_result.(\\d+)");
    private static final ObjectMapper OM = new ObjectMapper().configure(ALLOW_SINGLE_QUOTES, true);
    private static final String MERGED_TABLE_SCHEMA = "[" +
            "{'name': 'url', 'type': 'string', 'sort_order': 'ascending'}, " +
            "{'name': 'ts', 'type': 'uint64', 'sort_order': 'ascending'}, " +
            "{'name': 'code', 'type': 'uint64'}, " +
            "{'name': 'success', 'type': 'boolean'}]";
    // максимальное количество табличек, обрабатываемых за один проход
    private static final int MAX_TABLES_PER_TASK_RUN = 50;
    private static final Set<YandexHttpStatus> DEL_URL_CORRECT_STATUSES =
            Set.of(YandexHttpStatus.HTTP_403_FORBIDDEN, YandexHttpStatus.HTTP_404_NOT_FOUND,
                    YandexHttpStatus.HTTP_410_GONE, YandexHttpStatus.HTTP_1003_ROBOTS_TXT_DISALLOW, YandexHttpStatus.EXT_HTTP_2005_NOINDEX);


    private final DelUrlRequestsService delurlRequestsService;
    private final YtService ytService;
    private final CommonDataStateYDao commonDataStateYDao;
    private final AbtService abtService;

    private YtPath resultsDir;
    private YtPath mergedResultsTable;
    private YtPath mergedHookResultsTable;
    private boolean removeResultTables;
    private YtPath removedResultTablesDir;

    @Override
    public Result run(UUID runId) {
        TaskState taskState = new TaskState();
        setState(taskState);

        try {
            ytService.inTransaction(resultsDir).execute(cypressService -> {
                try {
                    DateTime minTableDate =
                            Optional.ofNullable(commonDataStateYDao.getValue(CommonDataType.ADDURL_LAST_STATE_UPDATE))
                                    .map(CommonDataState::getValue).map(Long::parseLong).map(DateTime::new)
                                    .orElse(DateTime.now().minus(MAX_TABLE_AGE));
                    List<YtPath> resultTables = findResultTables(cypressService, minTableDate);
                    if (!resultTables.isEmpty()) {
                        log.info("Processing tables {}", resultTables.stream().map(YtPath::getName)
                                .collect(Collectors.joining(", ")));

                        List<RecrawlResult> data = loadResults(cypressService, resultTables);
                        taskState.newResults += data.size();
                        Map<WebmasterHostId, Map<String, RecrawlResult>> webmasterHostIdMapMap = latestRecrawlResult(data);
                        // обновляем
                        updateDelUrlStates(taskState, webmasterHostIdMapMap);
                        // сохраним дату последней таблицы
                        DateTime lastTableDate = getTableDate(Iterables.getLast(resultTables), RESULT_TABLE_NAME_PATTERN);
                        commonDataStateYDao.update(new CommonDataState(
                                CommonDataType.ADDURL_LAST_STATE_UPDATE,
                                String.valueOf(lastTableDate.getMillis()),
                                DateTime.now())
                        );
                    }

                    // удаляем старые таблицы (вернее мержим)
                    mergeAndRemoveResultsTable(cypressService, mergedResultsTable, RESULT_TABLE_NAME_PATTERN);
                    mergeAndRemoveResultsTable(cypressService, mergedHookResultsTable, HOOK_RESULT_TABLE_NAME_PATTERN);
                    return true;
                } catch (IOException e) {
                    throw new WebmasterException("IO error",
                            new WebmasterErrorResponse.InternalUnknownErrorResponse(getClass(), "IO error"), e);
                } catch (ClickhouseException e) {
                    throw new WebmasterException("Clickhouse error",
                            new WebmasterErrorResponse.ClickhouseErrorResponse(getClass(), "Clickhouse error", e), e);
                } catch (WebmasterYdbException e) {
                    throw new WebmasterException("Ydb error",
                            new WebmasterErrorResponse.YDBErrorResponse(getClass(), e), e);
                }
            });
        } catch (YtException e) {
            throw new WebmasterException("YT error",
                    new WebmasterErrorResponse.YTServiceErrorResponse(getClass(), e), e);
        }
        return new Result(TaskResult.SUCCESS);
    }

    @NotNull
    private static DateTime getTableDate(YtPath lastTable, Pattern pattern) {
        Matcher matcher = pattern.matcher(lastTable.getName());
        Preconditions.checkState(matcher.matches());
        return new DateTime(TimeUnit.SECONDS.toMillis(Long.parseLong(matcher.group(1))));
    }

    /**
     * Возвращает список необработанных таблиц с результатами или пустой список, если новых результатов нет
     *
     * @return
     */
    private List<YtPath> findResultTables(YtCypressService cypressService, DateTime minTableDate) throws YtException {
        return cypressService.list(resultsDir).stream()
                .filter(table -> RESULT_TABLE_NAME_PATTERN.matcher(table.getName()).matches())
                .filter(table -> getTableDate(table, RESULT_TABLE_NAME_PATTERN).isAfter(minTableDate))
                .sorted().limit(MAX_TABLES_PER_TASK_RUN).collect(Collectors.toList());
    }

    /**
     * Вычитывает данные из YT
     *
     * @param resultTables
     * @return
     */
    private List<RecrawlResult> loadResults(YtCypressService cypressService, List<YtPath> resultTables)
            throws YtException, IOException, InterruptedException {
        // вычитываем данные в мапу
        List<RecrawlResult> result = new ArrayList<>();
        ExecutorService executorService = Executors.newSingleThreadExecutor();
        try {
            for (YtPath resultTable : resultTables) {
                AsyncTableReader<RecrawlResult> tableReader = new AsyncTableReader<>(cypressService, resultTable,
                        Range.all(), new RecrawlResult.YtResultRowMapper()).withRetry(5)
                        .inExecutor(executorService, "recrawl-results-cacher");
                try (AsyncTableReader.TableIterator<RecrawlResult> iterator = tableReader.read()) {
                    while (iterator.hasNext()) {
                        result.add(iterator.next());
                    }
                }
            }
        } finally {
            executorService.shutdown();
        }
        return result;
    }

    private void mergeAndRemoveResultsTable(YtCypressService cypressService, YtPath mergedResultsTable, Pattern pattern)
            throws YtException, IOException {
        if (!cypressService.exists(mergedResultsTable)) {
            YtNodeAttributes attributes = new YtNodeAttributes();
            attributes.getAttributes().put("schema", OM.readTree(MERGED_TABLE_SCHEMA));
            cypressService.create(mergedResultsTable, YtNode.NodeType.TABLE, true, attributes);
        }
        // мержим только только старше 6 часов
        DateTime maxDateToMerge = DateTime.now().minusHours(6);
        List<YtPath> oldResultTables = cypressService.list(resultsDir).stream()
                .filter(table -> pattern.matcher(table.getName()).matches())
                .filter(table -> getTableDate(table, pattern).isBefore(maxDateToMerge)).collect(Collectors.toList());
        if (oldResultTables.isEmpty()) {
            return;
        }
        List<YtPath> sourceTables = new ArrayList<>(oldResultTables);
        sourceTables.add(mergedResultsTable);
        // сливаем данные в общую табличку
        YtOperationId operationId = cypressService.sort(sourceTables, mergedResultsTable, "url", "ts");
        if (!cypressService.waitFor(operationId)) {
            throw new WebmasterException("Failed to wait for merge completion. OperationId = " + operationId,
                    new WebmasterErrorResponse.YTServiceErrorResponse(getClass(), null));
        }

        for (YtPath resultTable : oldResultTables) {
            if (removeResultTables) {
                // удаляем табличку с результатами
                cypressService.remove(resultTable);
            } else {
                // или просто перемещаем в нужную папку
                cypressService.move(resultTable, YtPath.path(removedResultTablesDir, resultTable.getName()), true);
            }
        }
    }


    private Map<WebmasterHostId, Map<String, RecrawlResult>> latestRecrawlResult(List<RecrawlResult> newResults) {
        // соберем только последние результаты переобхода
        Map<WebmasterHostId, Map<String, RecrawlResult>> latestRecrawlResults = new HashMap<>();
        for (RecrawlResult result : newResults) {
            latestRecrawlResults.computeIfAbsent(result.getHostId(), hostId -> new HashMap<>()).merge(
                    result.getRelativeUrl(), result, RecrawlResult::latest
            );
        }
        return latestRecrawlResults;
    }

    private void updateDelUrlStates(TaskState taskState, Map<WebmasterHostId, Map<String, RecrawlResult>> latestRecrawlResults) {
        DateTime now = DateTime.now();
        DateTime staleBefore = now.minus(DelUrlRequest.STALE_REQUEST_AGE);
        DateTime resendIfOlderThan = now.minus(RESEND_DEL_URL_AFTER);
        List<DelUrlRequest> processedUrls = new ArrayList<>();
        delurlRequestsService.list(staleBefore, now).forEach(url -> {
            if (abtService.isInExperiment(url.getHostId(), "DELURL_SAMOVAR")) {
                return;
            }

            if (url.getState() == DelurlState.NEW || url.getState() == DelurlState.IN_PROGRESS) {
                taskState.waitingUrls++;
                WebmasterHostId hostId = url.getHostId();
                RecrawlResult processingResult = latestRecrawlResults.getOrDefault(hostId,
                        Collections.emptyMap()).get(url.getRelativeUrl());
                DelurlState newState = computeNewState(url, processingResult, resendIfOlderThan);
                if (newState == null) {
                    return;
                }
                switch (newState) {
                    case ACCEPTED:
                        taskState.delurlProcessed++;
                        break;
                    case ERROR:
                        taskState.delurlError++;
                        break;
                }
                if(newState == DelurlState.ERROR){
                    processedUrls.add(
                            url.withState(newState).
                                    withErrorMessage("Result status is not corrected for DelUrl or result is not success")
                    );
                } else {
                    processedUrls.add(url.withState(newState));
                }
            }
        });

        log.info("Save processed delUrls to Ydb: {}", processedUrls.size());
        delurlRequestsService.updateBatch(processedUrls);
    }


    static boolean checkDelUrlCode(RecrawlResult result) {
        return result.isSuccess() && DEL_URL_CORRECT_STATUSES.contains(result.getCode());
    }

    static DelurlState computeNewState(DelUrlRequest url, RecrawlResult result, DateTime resendIfOlderThan) {
        if (result == null) {
            if ((url.getState() == DelurlState.NEW) && url.getAddDate().isBefore(resendIfOlderThan)) {
                return url.getState();
            }
        } else if (url.getAddDate().isBefore(result.getProcessingTime())) {
            return checkDelUrlCode(result) ? DelurlState.ACCEPTED : DelurlState.ERROR;//Check error code and
        }

        return null;
    }

    static RecrawlState computeNewState(UrlForRecrawl url, RecrawlResult result,
                                        DateTime resendIfOlderThan, DateTime urlStaleIfOlderThan) {
        if (result == null) {
            // Not processed or failed
            if (url.getAddDate().isBefore(urlStaleIfOlderThan)) {
                return RecrawlState.STALE;
            } else if (url.getState() == RecrawlState.NEW && url.getAddDate().isBefore(resendIfOlderThan)) {
                return RecrawlState.NEW;
            }
        } else if (url.getAddDate().isBefore(result.getProcessingTime())) {
            return result.isSuccess() ? RecrawlState.PROCESSED : RecrawlState.STALE;
        }
        return null;
    }

    @Override
    public PeriodicTaskType getType() {
        return PeriodicTaskType.URL_STATE_UPDATE;
    }

    @Override
    public TaskSchedule getSchedule() {
        return TaskSchedule.startByCron("0 0 * * * *");
    }

    public void setResultsDir(YtPath resultsDir) {
        this.resultsDir = resultsDir;
    }

    public void setMergedResultsTable(YtPath mergedResultsTable) {
        this.mergedResultsTable = mergedResultsTable;
    }

    public void setMergedHookResultsTable(YtPath mergedHookResultsTable) {
        this.mergedHookResultsTable = mergedHookResultsTable;
    }

    public void setRemoveResultTables(boolean removeResultTables) {
        this.removeResultTables = removeResultTables;
    }

    public void setRemovedResultTablesDir(YtPath removedResultTablesDir) {
        this.removedResultTablesDir = removedResultTablesDir;
    }

    @Data
    public static class TaskState implements PeriodicTaskState {
        public long newResults;
        public long waitingUrls;
        public long urlsSentToRecrawl;
        public long delurlProcessed;
        public  long delurlError;
    }
}
