package ru.yandex.webmaster3.worker.addurl;

import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.jetbrains.annotations.Nullable;
import org.joda.time.DateTime;
import org.joda.time.Duration;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import ru.yandex.webmaster3.core.WebmasterException;
import ru.yandex.webmaster3.core.addurl.RecrawlState;
import ru.yandex.webmaster3.core.addurl.UrlForRecrawl;
import ru.yandex.webmaster3.core.codes.LinkType;
import ru.yandex.webmaster3.core.data.WebmasterHostId;
import ru.yandex.webmaster3.core.http.WebmasterErrorResponse;
import ru.yandex.webmaster3.core.logbroker.reader.IDataProcessing;
import ru.yandex.webmaster3.core.logbroker.reader.MessageContainer;
import ru.yandex.webmaster3.core.util.RetryUtils;
import ru.yandex.webmaster3.core.util.environment.YandexEnvironmentType;
import ru.yandex.webmaster3.storage.abt.AbtService;
import ru.yandex.webmaster3.storage.abt.model.Experiment;
import ru.yandex.webmaster3.storage.addurl.AddUrlRequestsService;
import ru.yandex.webmaster3.storage.host.AllVerifiedHostsCacheService;
import ru.yandex.wmtools.common.util.http.YandexHttpStatus;

import java.io.ByteArrayInputStream;
import java.util.Base64;
import java.util.List;
import java.util.zip.InflaterInputStream;


/**
 * @author leonidrom
 */
@Slf4j
@Component("urlRecrawlResultsProcessingService")
public class UrlRecrawlResultsProcessingService implements IDataProcessing {
    private static final RetryUtils.RetryPolicy RETRY_POLICY = RetryUtils.linearBackoff(5, Duration.standardSeconds(30));

    private final AddUrlRequestsService addUrlRequestsService;
    private final AllVerifiedHostsCacheService allVerifiedHostsCacheService;
    private final YandexEnvironmentType yandexEnvironmentType;
    private boolean enableLogging = false;

    @Autowired
    public UrlRecrawlResultsProcessingService(
            AddUrlRequestsService addUrlRequestsService,
            AllVerifiedHostsCacheService allVerifiedHostsCacheService,
            AbtService abtService,
            @Value("${webmaster3.environmentType}") YandexEnvironmentType yandexEnvironmentType) {
        this.addUrlRequestsService = addUrlRequestsService;
        this.allVerifiedHostsCacheService = allVerifiedHostsCacheService;
        this.yandexEnvironmentType = yandexEnvironmentType;
    }

    public void init() {
        enableLogging = yandexEnvironmentType == YandexEnvironmentType.PRODUCTION;
    }

    @Override
    public void process(MessageContainer messageContainer) {
        if (enableLogging) {
            log.info("Total messages: {}", messageContainer.getRawMessages().size());
        }

        for (byte[] rawMessage : messageContainer.getRawMessages()) {
            RecrawlResult recrawlResult = parseRecrawlResult(rawMessage);
            if (recrawlResult == null) {
                continue;
            }

            WebmasterHostId hostId = recrawlResult.getHostId();
            if (!allVerifiedHostsCacheService.contains(hostId)) {
                continue;
            }

            // на практике, несколько запросов на один урл - очень редкий случай
            List<UrlForRecrawl> pendingRequests = addUrlRequestsService.getPendingRequests(
                    hostId, recrawlResult.getFullUrl());

            if (enableLogging) {
                log.info("Total request pending for {}: {}", recrawlResult.getFullUrl(), pendingRequests.size());
            }

            for (UrlForRecrawl req : pendingRequests) {
                RecrawlState newState = computeNewState(req, recrawlResult);
                if (newState != null) {
                    try {
                        RetryUtils.execute(RETRY_POLICY, () -> {
                            addUrlRequestsService.update(req.changeState(newState, DateTime.now()));
                        });
                    } catch (Exception e) {
                        throw new WebmasterException("Failed to update requests states",
                                new WebmasterErrorResponse.InternalUnknownErrorResponse(getClass(), "Failed to update requests states"), e);
                    }
                }
            }
        }

        messageContainer.commit();
    }

    @Nullable
    private RecrawlResult parseRecrawlResult(byte[] data) {
        try {
            var msg = NKwYT.Queries.TWebmasterAddurlItem.parseFrom(
                    new InflaterInputStream(new ByteArrayInputStream(data)));
            if (enableLogging) {
                log.info("Got message:\n{}", msg.toString());
            }

            // url это непосредственно скачанный урл, originalUrl это то что было запрошено.
            // originalUrl проставляется, если из-за редиректов он отличается от url
            String fullUrl = msg.getOriginalUrl();
            if (StringUtils.isEmpty(fullUrl)) {
                if (enableLogging) {
                    log.info("No original url");
                }
                fullUrl = msg.getUrl();
            } else {
                if (enableLogging) {
                    log.info("Original url: {}", fullUrl);
                }
            }

            if (enableLogging) {
                log.info("Url: {}", fullUrl);
            }

            Pair<WebmasterHostId, String> pair = null;
            try {
                pair = UrlForRecrawl.toHostIdAndRelativeUrl(fullUrl);
            } catch (Exception e) {
                // игнорируем, обработка ниже
            }

            if (pair == null) {
                if (enableLogging) {
                    log.error("Invalid url: {}", msg.getOriginalUrl());
                }
                return null;
            }

            int httpCode = (int)msg.getHttpCode();
            // отрицательных кодов быть не должно, но на всякий случай
            LinkType linkType = httpCode > 0? LinkType.get(httpCode) : LinkType.NOT_DOWNLOADED;
            if (linkType == LinkType.REDIRECT) {
                // Игнорируем редиректы: Робот посылает нам всю цепочку,
                // нас же интересует только конечный результат
                if (enableLogging) {
                    log.info("Ignoring redirect from {}", msg.getUrl());
                }
                return null;
            }

            // если урл закрыт пo robots.txt, то lass access будет 0
            long timestamp = msg.getLastAccess() * 1000L;
            if (timestamp == 0) {
                timestamp = System.currentTimeMillis();
            }

            return RecrawlResult.builder()
                    .hostId(pair.getLeft())
                    .relativeUrl(pair.getRight())
                    .fullUrl(fullUrl)
                    .processingTime(new DateTime(timestamp))
                    .code(YandexHttpStatus.parseCode(httpCode))
                    .success(linkType != LinkType.NOT_DOWNLOADED)
                    .build();
        } catch (Exception e) {
            if (enableLogging) {
                log.error("Error parsing message \n{}\n", Base64.getEncoder().encodeToString(data), e);
            }
            return null;
        }
    }

    @Nullable
    private RecrawlState computeNewState(UrlForRecrawl url, RecrawlResult result) {
        if (enableLogging) {
            log.info("Computing new state for {}", url);
        }

        RecrawlState newState = null;
        if (url.getAddDate().isBefore(result.getProcessingTime())) {
            newState = result.isSuccess() ? RecrawlState.PROCESSED : RecrawlState.STALE;
            if (enableLogging) {
                log.info("New state: {}", newState);
            }
        } else {
            // для этого запроса нет изменений
            if (enableLogging) {
                log.info("New state: UNCHANGED");
            }
        }

        return newState;
    }
}
