package ru.yandex.webmaster3.worker.fresh;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Strings;
import com.google.common.collect.Range;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.joda.time.DateTime;
import org.joda.time.Duration;
import org.joda.time.LocalDate;
import org.joda.time.LocalDateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import ru.yandex.webmaster3.core.WebmasterException;
import ru.yandex.webmaster3.core.data.WebmasterHostId;
import ru.yandex.webmaster3.core.http.WebmasterErrorResponse;
import ru.yandex.webmaster3.core.util.IdUtils;
import ru.yandex.webmaster3.core.util.RetryUtils;
import ru.yandex.webmaster3.core.util.TimeUtils;
import ru.yandex.webmaster3.core.worker.task.PeriodicTaskState;
import ru.yandex.webmaster3.core.worker.task.PeriodicTaskType;
import ru.yandex.webmaster3.storage.host.CommonDataState;
import ru.yandex.webmaster3.storage.host.CommonDataType;
import ru.yandex.webmaster3.storage.searchurl.samples.dao.SearchUrlFreshStatisticsCHDao;
import ru.yandex.webmaster3.storage.searchurl.samples.dao.SearchUrlFreshUrlSampleCHDao;
import ru.yandex.webmaster3.storage.searchurl.samples.data.SearchUrlEventType;
import ru.yandex.webmaster3.storage.settings.dao.CommonDataStateYDao;
import ru.yandex.webmaster3.storage.util.clickhouse2.ClickhouseEscapeUtils;
import ru.yandex.webmaster3.storage.util.yt.*;
import ru.yandex.webmaster3.worker.PeriodicTask;
import ru.yandex.webmaster3.worker.TaskSchedule;

import java.io.IOException;
import java.util.*;
import java.util.concurrent.*;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;

import static java.util.stream.Collectors.counting;

/**
 * ishalaru
 * 18.03.2021
 **/
@Slf4j
@Component
@RequiredArgsConstructor(onConstructor_ = {@Autowired})
public class ImportCallistoFreshUrlPeriodicTask extends PeriodicTask<ImportCallistoFreshUrlPeriodicTask.State> {
    private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormat.forPattern("yyyyMMdd-HHmmss");
    private final static int BATCH_SIZE = 100_000;
    private final static int TOTAL_THREADS = 4;
    private final YtService ytService;
    private final CommonDataStateYDao commonDataStateYDao;
    private final SearchUrlFreshUrlSampleCHDao searchUrlFreshUrlSampleCHDao;
    private final SearchUrlFreshStatisticsCHDao searchUrlFreshStatisticsCHDao;
    @Value("${webmaster3.worker.mercury.table.dir}")
    private YtPath path;


    @Override
    public Result run(UUID runId) throws Exception {
        setState(new State());
        DateTime lastImportedTable = Optional.ofNullable(commonDataStateYDao.getValue(CommonDataType.LAST_CALLISTO_IMPORT))
                .map(state -> DateTime.parse(state.getValue(), DATE_FORMATTER))
                .orElse(DateTime.parse("2021-01-01T00:00:00Z"));

        ytService.withoutTransaction(cypressService -> {
            YtPath tablePath = findTable(lastImportedTable, cypressService);
            if (tablePath == null) {
                return false;
            }

            processTable(tablePath, cypressService);
            commonDataStateYDao.update(new CommonDataState(CommonDataType.LAST_CALLISTO_IMPORT, tablePath.getName(), DateTime.now()));

            return true;
        });

        return Result.SUCCESS;
    }

    private void processTable(YtPath tablePath, YtCypressService cypressService) {
        var tableReader = new AsyncTableReader<>(cypressService, tablePath, Range.all(),
                        YtTableReadDriver.createYSONDriver(YtRow.class))
                        .splitInParts(100_000);
        var executorService = ru.yandex.common.util.concurrent.Executors.newBlockingFixedThreadPool(
                TOTAL_THREADS, TOTAL_THREADS,
                0, TimeUnit.MILLISECONDS,
                new ArrayBlockingQueue<>(TOTAL_THREADS * 2),
                Executors.defaultThreadFactory());
        List<Future<?>> processing = new ArrayList<>();
        var localDate = DateTime.parse(tablePath.getName(), DATE_FORMATTER).toLocalDate();

        try (var iterator = tableReader.read()) {
            List<SearchUrlFreshUrlSampleCHDao.Row> batch = new ArrayList<>();
            while (iterator.hasNext()) {
                var row = iterator.next();
                state.totalRows++;
                if (Strings.isNullOrEmpty(row.getHost())) {
                    continue;
                }

                var addTime = new LocalDateTime(row.getAddTime() * 1000, TimeUtils.EUROPE_MOSCOW_ZONE);
                var lastAccess = new LocalDateTime(row.getLastAccess() * 1000, TimeUtils.EUROPE_MOSCOW_ZONE);
                var insertTime = new LocalDateTime(row.getInsertTime() * 1000, TimeUtils.EUROPE_MOSCOW_ZONE);
                var insertRow = SearchUrlFreshUrlSampleCHDao.Row.builder()
                        .hostId(IdUtils.urlToHostId(row.getHost()))
                        .path(row.getPath())
                        .actionType(SearchUrlEventType.NEW.value())
                        .addTime(addTime)
                        .lastAccess(lastAccess)
                        .title(ClickhouseEscapeUtils.escapeString(row.getTitle()))
                        .httpCode((int) row.httpCode)
                        .relCanonicalTarget(row.relCanonicalTarget)
                        .insetTime(insertTime)
                        .validFromMetrikaLastAccess(row.validFromMetrikaLastAccess == null? 0L : row.validFromMetrikaLastAccess)
                        .validFromIndexNowLastAccess(row.validFromIndexNowLastAccess == null? 0L : row.validFromIndexNowLastAccess)
                        .build();

                batch.add(insertRow);
                if (batch.size() >= BATCH_SIZE) {
                    var workList = new ArrayList<>(batch);
                    var future = executorService.submit(() -> insertRecords(localDate, workList));
                    processing.add(future);
                    batch.clear();
                }
            }

            var workList = new ArrayList<>(batch);
            var future = executorService.submit(() -> insertRecords(localDate, workList));
            processing.add(future);

            for (var f : processing) {
                try {
                    f.get();
                } catch (ExecutionException exp) {
                    log.error("Failed to process batch", exp);
                    state.insertErrors.incrementAndGet();
                }
            }
        } catch (IOException | InterruptedException e) {
            throw new WebmasterException("YT error",
                    new WebmasterErrorResponse.YTServiceErrorResponse(getClass(), e), e);
        }
    }

    public void insertRecords(LocalDate localDate, List<SearchUrlFreshUrlSampleCHDao.Row> workList) {
        try {
            // мера против ошибки кликхауса too many parts
            RetryUtils.execute(RetryUtils.expBackoff(7, Duration.millis(200)), () -> {
                searchUrlFreshUrlSampleCHDao.addRecords(workList);

                final Map<WebmasterHostId, Long> collect = workList.stream().collect(Collectors.groupingBy(SearchUrlFreshUrlSampleCHDao.Row::getHostId,
                        counting()));
                searchUrlFreshStatisticsCHDao.addRecord(localDate, collect);

            });

            // дадим кликаусу возможность разгрести данные
            Thread.sleep(5000);
        } catch (Exception e) {
            log.error("Failed to insert data", e);
            state.insertErrors.incrementAndGet();
        }
    }

    public YtPath findTable(DateTime lastImportTableDate, YtCypressService cypressService) {
        return cypressService.list(path)
                .stream()
                .map(YtPath::getName)
                .filter(tn -> DateTime.parse(tn, DATE_FORMATTER).isAfter(lastImportTableDate))
                .min(Comparator.naturalOrder())
                .map(name -> YtPath.path(path, name))
                .orElse(null);
    }

    @lombok.Value
    @RequiredArgsConstructor(onConstructor_ = @JsonCreator)
    private static class YtRow {
        @JsonProperty("Host")
        String host;
        @JsonProperty("Path")
        String path;
        @JsonProperty("SearchAddTime")
        long addTime;
        @JsonProperty("Title")
        String title;
        @JsonProperty("LastAccess")
        long lastAccess;
        @JsonProperty("RelCanonicalTarget")
        String relCanonicalTarget;
        @JsonProperty("HttpCode")
        long httpCode;
        @JsonProperty("InsertTime")
        long insertTime;
        @JsonProperty("ValidFromMetrikaLastAccess")
        Long validFromMetrikaLastAccess;
        @JsonProperty("ValidFromIndexNowLastAccess")
        Long validFromIndexNowLastAccess;
    }

    @Override
    public PeriodicTaskType getType() {
        return PeriodicTaskType.IMPORT_CALLISTO_FRESH_INFO;
    }

    @Override
    public TaskSchedule getSchedule() {
        return TaskSchedule.startByCron("0 */10 * * * *");
    }

    public static class State implements PeriodicTaskState {
        @Getter
        private long totalRows;
        private final AtomicLong insertErrors = new AtomicLong();

        public long getInsertErrors() {
            return insertErrors.get();
        }
    }
}
