package ru.yandex.webmaster3.worker.robotstxt;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;

import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.collect.Range;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.tuple.Pair;
import org.joda.time.DateTime;
import org.joda.time.Duration;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;

import ru.yandex.webmaster3.core.data.WebmasterHostId;
import ru.yandex.webmaster3.core.util.IdUtils;
import ru.yandex.webmaster3.core.util.RetryUtils;
import ru.yandex.webmaster3.core.util.concurrent.graph.BlockingBatchConsumer;
import ru.yandex.webmaster3.core.util.concurrent.graph.GraphExecution;
import ru.yandex.webmaster3.core.util.concurrent.graph.GraphExecutionBuilder;
import ru.yandex.webmaster3.core.worker.task.PeriodicTaskState;
import ru.yandex.webmaster3.core.worker.task.PeriodicTaskType;
import ru.yandex.webmaster3.core.worker.task.TaskResult;
import ru.yandex.webmaster3.storage.host.AllVerifiedHostsCacheService;
import ru.yandex.webmaster3.storage.robotstxt.RobotTxtInfo;
import ru.yandex.webmaster3.storage.robotstxt.RobotsTxtService;
import ru.yandex.webmaster3.storage.settings.SettingsService;
import ru.yandex.webmaster3.storage.user.service.UserHostsService;
import ru.yandex.webmaster3.storage.util.yt.AsyncTableReader;
import ru.yandex.webmaster3.storage.util.yt.YtCypressService;
import ru.yandex.webmaster3.storage.util.yt.YtException;
import ru.yandex.webmaster3.storage.util.yt.YtPath;
import ru.yandex.webmaster3.storage.util.yt.YtService;
import ru.yandex.webmaster3.storage.util.yt.YtTableReadDriver;
import ru.yandex.webmaster3.worker.PeriodicTask;
import ru.yandex.webmaster3.worker.TaskSchedule;

import static ru.yandex.webmaster3.core.worker.task.PeriodicTaskType.IMPORT_ROBOTS_TXT;
import static ru.yandex.webmaster3.storage.host.CommonDataType.ROBOTS_TXT_LAST_TABLE;

/**
 * WMC-6417
 *
 * @author akhazhoyan 11/2018
 */
@Slf4j
@Service
@RequiredArgsConstructor(onConstructor_ = @Autowired)
public final class ImportRobotsTxtTask extends PeriodicTask<ImportRobotsTxtTask.State> {
    private static final RetryUtils.RetryPolicy LINEAR_BACKOFF =
            RetryUtils.linearBackoff(3, Duration.standardSeconds(10));

    private final AllVerifiedHostsCacheService allVerifiedHostsCacheService;
    private final RobotsTxtService robotsTxtService;
    private final SettingsService settingsService;
    private final UserHostsService userHostsService;
    @Value("${external.yt.service.hahn.root.production}/kwyt/robots/updates")
    private final YtPath ytPath;
    private final YtService ytService;

    @Override
    public Result run(UUID runId) throws Exception {
        log.info("Started importing robots txt table");
        LocalState localState = new LocalState();
        ytService.inTransaction(ytPath).execute(cypressService -> {
            boolean shouldContinue = initStateAndCheck(cypressService, localState);
            if (!shouldContinue) {
                return false;
            }

            ExecutorService ytThreadPool = Executors.newFixedThreadPool(1);
            AsyncTableReader<YtRow> tableReader = new AsyncTableReader<>(
                    cypressService,
                    localState.tableToProcess,
                    Range.all(),
                    YtTableReadDriver.createYSONDriver(YtRow.class)
            ).splitInParts(50_000L)
                    .inExecutor(ytThreadPool, "import-robots-txt-cacher")
                    .withRetry(5);

            GraphExecutionBuilder gb = GraphExecutionBuilder.newBuilder("robots-history");
            GraphExecutionBuilder.Queue<YtRow> node = gb.process(
                    () -> (BlockingBatchConsumer<YtRow>) batch -> {
                        try {
                            RetryUtils.execute(LINEAR_BACKOFF, () -> processRows(localState, batch));
                        } catch (Exception e) {
                            log.error("Failed to add robots.txt", e);
                            localState.rowsFailed.addAndGet(batch.size());
                        }
                    }
            )
                    .concurrency(8)
                    .name("writer")
                    .forceFullBatch()
                    .getInput();
            try (AsyncTableReader.TableIterator<YtRow> it = tableReader.read();
                 GraphExecution<YtRow> graph = gb.build(node)) {
                graph.start();
                while (it.hasNext()) {
                    graph.put(it.next());
                }
                graph.doneWritingAndAwaitTermination();
            } catch (IOException | ExecutionException e) {
                throw new YtException("Unable to read table: " + localState.tableToProcess, e);
            } finally {
                ytThreadPool.shutdown();
            }
            log.info("State: rowsFailed={}, rowsTotal={}, hostToUsers.size={}",
                    localState.rowsFailed, localState.rowsTotal, localState.hostToUsers.size());
            processState(localState);

            return true;
        });
        log.info("Finished importing robots txt table");
        setState(new State(localState.rowsTotal.get(), localState.rowsFailed.get(), localState.tableToProcess));
        return new Result(TaskResult.SUCCESS);
    }

    @VisibleForTesting
    boolean initStateAndCheck(YtCypressService cypressService, LocalState localState) {
        DateTime tableImportedLastTime = DateTime.parse(
                settingsService.getSettingUncached(ROBOTS_TXT_LAST_TABLE).getValue()
        );
        Optional<YtPath> maybeTableToProcess = findTableToProcess(tableImportedLastTime, cypressService);
        if (!maybeTableToProcess.isPresent()) {
            log.info("Did not find table to process. Last processed table: {}. Finishing task",
                    tableImportedLastTime);
            return false;
        }
        localState.tableToProcess = maybeTableToProcess.get();
        localState.tableName = localState.tableToProcess.getName();

        // Если хост добавили недавно, в таблице YT будут все известные нам robots.txt этого хоста, то есть
        // скаченные не только в день `DateTime.parse(tableName)`, но и за несколько дней до этого.
        // Мы не хотим отправить письма про эти robots.txt, поэтому сравниваем с этим threshold
        localState.notificationThresholdMillis = DateTime.parse(localState.tableName)
                .minusDays(1)
                .getMillis();
        log.info("Notification threshold millis: {}", localState.notificationThresholdMillis);

        return true;
    }

    @VisibleForTesting
    void processRows(LocalState localState, Collection<YtRow> rows) {
        List<RobotTxtInfo> verifiedRows = rows.stream()
                .filter(row -> allVerifiedHostsCacheService.contains(row.hostId))
                .map(YtRow::toRobotTxtInfo)
                .collect(Collectors.toList());

        localState.rowsTotal.addAndGet(verifiedRows.size());
        robotsTxtService.addRobotsTxt(verifiedRows);
        for (var row : verifiedRows) {
            if (row.getDateAdded().getMillis() > localState.notificationThresholdMillis) {
                localState.hostToUsers.computeIfAbsent(row.getHostId(),
                        hostId -> new ArrayList<>(userHostsService.listUsersVerifiedHost(hostId).keySet()));
            }
        }
    }

    @VisibleForTesting
    void processState(LocalState localState) throws InterruptedException {
        if ((localState.rowsFailed.get() / (double) localState.rowsTotal.get()) > 0.1) {
            log.error("Too many failures: {} out of {}", localState.rowsFailed, localState.rowsTotal);
            throw new IllegalStateException("Too many failures");
        }

        List<Pair<WebmasterHostId, Long>> items = localState.hostToUsers.entrySet().stream().flatMap(entry -> {
            WebmasterHostId hostId = entry.getKey();
            return entry.getValue().stream().map(userId -> Pair.of(hostId, userId));
        }).collect(Collectors.toList());

        RetryUtils.execute(
                LINEAR_BACKOFF,
                () -> robotsTxtService.sendRobotsTxtNotification(items, localState.tableName)
        );
        settingsService.update(ROBOTS_TXT_LAST_TABLE, localState.tableName);
    }

    @VisibleForTesting
    static class LocalState {
        AtomicLong rowsFailed = new AtomicLong();
        AtomicLong rowsTotal = new AtomicLong();
        long notificationThresholdMillis;
        Map<WebmasterHostId, List<Long>> hostToUsers = new ConcurrentHashMap<>();
        String tableName;
        YtPath tableToProcess;
        List<Pair<WebmasterHostId, Long>> sentNotification = new ArrayList<>();
    }

    @VisibleForTesting
    @Getter
    static final class YtRow {
        final WebmasterHostId hostId;
        final long timestampSeconds;
        final String content;

        public YtRow(
                @JsonProperty("Host") String host,
                @JsonProperty("LastAccess") Long lastAccess,
                @JsonProperty("Content") String content) {
            this.hostId = IdUtils.urlToHostId(host);
            this.timestampSeconds = Preconditions.checkNotNull(lastAccess);
            this.content = Strings.nullToEmpty(content);
        }

        public long getTimestampMilliseconds() {
            return timestampSeconds * 1000;
        }

        public RobotTxtInfo toRobotTxtInfo() {
            return new RobotTxtInfo(hostId, getTimestampMilliseconds(), content);
        }
    }

    private Optional<YtPath> findTableToProcess(DateTime tableImportedLastTime,
                                                YtCypressService cypressService) throws YtException {
        return cypressService.list(ytPath)
                .stream()
                .map(p -> p.getName())
                .filter(n -> n.matches("\\d{4}-\\d{2}-\\d{2}"))
                .filter(n -> new DateTime(n).isAfter(tableImportedLastTime))
                .min(Comparator.naturalOrder())
                .map(name -> YtPath.path(ytPath, name));
    }

    @lombok.Value
    public static class State implements PeriodicTaskState {
        long rowsTotal;
        long rowsFailed;
        YtPath processedTable;
    }

    @Override
    public PeriodicTaskType getType() {
        return IMPORT_ROBOTS_TXT;
    }

    @Override
    public TaskSchedule getSchedule() {
        return TaskSchedule.startByCron("0 */5 * * * *");
    }

}
