package ru.yandex.webmaster3.storage.delurl;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.List;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import java.util.function.Predicate;

import com.datastax.driver.core.utils.UUIDs;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpStatus;
import org.jetbrains.annotations.NotNull;
import org.joda.time.DateTime;
import org.joda.time.Duration;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import ru.yandex.common.util.concurrent.Executors;
import ru.yandex.webmaster3.core.addurl.AddUrlService;
import ru.yandex.webmaster3.core.addurl.RecrawlServiceException;
import ru.yandex.webmaster3.core.data.WebmasterHostId;
import ru.yandex.webmaster3.core.delurl.DelUrlRequest;
import ru.yandex.webmaster3.core.delurl.DelurlState;
import ru.yandex.webmaster3.core.delurl.DelurlType;
import ru.yandex.webmaster3.core.robotstxt.AllowPrefixInfo;
import ru.yandex.webmaster3.core.robotstxt.RobotsTxtUtils;
import ru.yandex.webmaster3.core.util.HtmlUtils;
import ru.yandex.webmaster3.core.util.IdUtils;
import ru.yandex.webmaster3.core.util.ZoraResponseDocumentUtil;
import ru.yandex.webmaster3.core.zora.GoZoraService;
import ru.yandex.webmaster3.core.zora.OfflineZoraService;
import ru.yandex.webmaster3.core.zora.ZoraConversionUtil;
import ru.yandex.webmaster3.core.zora.data.request.ZoraPDFetchRequest;
import ru.yandex.webmaster3.core.zora.data.response.ZoraPDFetchStatus;
import ru.yandex.webmaster3.core.zora.data.response.ZoraUrlFetchResponse;
import ru.yandex.webmaster3.core.zora.go_data.request.GoZoraRequest;
import ru.yandex.webmaster3.storage.delurl.dao.DelUrlQueueYDao;
import ru.yandex.webmaster3.storage.util.ydb.exception.WebmasterYdbException;
import ru.yandex.wmtools.common.error.InternalException;
import ru.yandex.wmtools.common.sita.SitaException;
import ru.yandex.wmtools.common.util.http.YandexHttpStatus;

import static ru.yandex.webmaster3.core.zora.ZoraSourceEnum.webmaster_robot;
import static ru.yandex.webmaster3.core.zora.ZoraSourceEnum.webmaster_robot_batch;
import static ru.yandex.wmtools.common.util.http.YandexHttpStatus.HTTP_1003_ROBOTS_TXT_DISALLOW;

/**
 * @author aherman
 */
@Slf4j
@Service
@RequiredArgsConstructor(onConstructor_ = @Autowired)
public class DelUrlService {
    private static final String ROBOTS_TXT_PATH = "/robots.txt";
    private final ThreadFactory threadFactory = new ThreadFactoryBuilder()
            .setDaemon(true)
            .setNameFormat("delurl-zora-%d")
            .build();

    private final ExecutorService executor = Executors.newBlockingFixedThreadPool(10, 200, 60L, TimeUnit.SECONDS,
            new ArrayBlockingQueue<>(10000), threadFactory);

    private static final Duration REALTIME_TIMEOUT = Duration.standardSeconds(40);
    private static final Duration BATCH_TIMEOUT = Duration.standardSeconds(60);

    private final AddUrlService addUrlService;
    private final DelUrlQueueYDao delurlQueueYDao;
    private final OfflineZoraService offlineZoraService;
    private final GoZoraService goZoraService;


    private DelUrlRequest updateStatus(DelUrlRequest delurlRequest, ZoraUrlFetchResponse urlFetchResponse)
            throws IOException, InternalException {
        Boolean allowedInRobotsTxt = urlFetchResponse.isAllowedInRobotsTxt();
        if (allowedInRobotsTxt != null && !allowedInRobotsTxt) {
            return delurlRequest
                    .withAllowedInRobotsTxt(false)
                    .withState(DelurlState.IN_PROGRESS);
        }
        Integer serverHttpStatusCode = urlFetchResponse.getServerHttpStatusCode();

        if (serverHttpStatusCode != null) {
            delurlRequest = delurlRequest.withHttpCode(serverHttpStatusCode);
            if (serverHttpStatusCode == HttpStatus.SC_NOT_FOUND
                    || serverHttpStatusCode == HttpStatus.SC_FORBIDDEN
                    || serverHttpStatusCode == HttpStatus.SC_GONE) {
                return delurlRequest.withState(DelurlState.IN_PROGRESS);
            }

            List<HtmlUtils.MetaTag> metaTags =
                    HtmlUtils.extractMetaTags(ZoraResponseDocumentUtil.getResponseString(urlFetchResponse));
            long noindexMetaTagsCount = metaTags.stream()
                    .filter(NOINDEX_METATAG_PREDICATE)
                    .count();
            if (noindexMetaTagsCount > 0) {
                return delurlRequest
                        .withNoindex(true)
                        .withState(DelurlState.IN_PROGRESS);
            }
        } else {
            return delurlRequest.withState(DelurlState.REJECTED)
                    .withErrorMessage("Allowed in robots.txt and status code is null");
        }

        return delurlRequest.withState(DelurlState.REJECTED)
                .withErrorMessage("Allowed in robots.txt and status code is %s and there is no noindex meta tags"
                        .formatted(serverHttpStatusCode.toString()));
    }

    @NotNull
    static final Predicate<HtmlUtils.MetaTag> NOINDEX_METATAG_PREDICATE = mt -> {
        String name = StringUtils.defaultString(mt.getName()).toLowerCase();
        String value = StringUtils.defaultString(mt.getContent()).toLowerCase();

        if ("robots".equals(name) || "yandex".equals(name)) {
            String[] valueParts = StringUtils.split(value, ", ");
            for (String valuePart : valueParts) {
                if ("noindex".equals(valuePart) || "none".equals(valuePart)) {
                    return true;
                }
            }
        }
        return false;
    };

    public DelUrlRequest processUrl(WebmasterHostId hostId, String relativeUrl, String userIp, String userYandexUid,
                                    long userId, String balancerRequestId, boolean isRealtime) throws RecrawlServiceException {
        DateTime now = DateTime.now();
        DelUrlRequest delurlRequest = new DelUrlRequest(UUIDs.timeBased(), hostId, relativeUrl,
                DelurlState.NEW, DelurlType.URL, now, now, true,
                0, false, userId, balancerRequestId, 0, null);
        return processUrlInternal(delurlRequest, true);
    }

    public DelUrlRequest processUrl(DelUrlRequest delurlRequest) throws RecrawlServiceException {
        switch (delurlRequest.getType()) {
            case URL:
                return processUrlInternal(delurlRequest);
            case PREFIX:
                return processPrefixInternal(delurlRequest);
        }
        throw new IllegalStateException();
    }

    public Future<DelUrlRequest> processUrlAsync(DelUrlRequest delurlRequest) {
        if (delurlRequest.getType() == DelurlType.URL) {
            return executor.submit(() -> processUrlInternal(delurlRequest));
        }
        throw new IllegalStateException();
    }

    private DelUrlRequest processUrlInternal(DelUrlRequest delurlRequest) throws RecrawlServiceException {
        return processUrlInternal(delurlRequest, false);
    }

    private DelUrlRequest checkWithGoZora(DelUrlRequest delurlRequest, URI uri) {
        try {
            ZoraUrlFetchResponse response =
                    ZoraConversionUtil.toUrlFetchResponse(goZoraService.executeRequestFetchResponse(GoZoraRequest.of(uri.toString())));

            return updateStatus(delurlRequest, response);
        } catch (Exception e) {
            log.info("error ", e);
            return delurlRequest.withState(DelurlState.ERROR).withErrorMessage("error " + e);
        }
    }

    private DelUrlRequest processUrlInternalFast(DelUrlRequest delurlRequest, String fullUrl, URI uri) throws RecrawlServiceException {
        DelUrlRequest newDelUrlRequest = checkWithGoZora(delurlRequest, uri);
        try {
            sendUrlToSamovar(newDelUrlRequest, fullUrl);
        } catch (RecrawlServiceException e) {
            if (!e.isRecoverableError()) {
                log.error("Unrecoverable error when processing url " + fullUrl, e);
                return delurlRequest.withState(DelurlState.ERROR)
                        .withErrorMessage("Unrecoverable error when processing url " + fullUrl + e);
            }
            throw e;
        }
        return newDelUrlRequest;
    }

    private DelUrlRequest processUrlInternal(DelUrlRequest delurlRequest, boolean isRealtime) throws RecrawlServiceException {
        ZoraUrlFetchResponse urlFetchResponse;
        String fullUrl = delurlRequest.getFullUrl();
        try {
            URI uri = new URI(fullUrl);
            DelUrlRequest tmpRequest = processUrlInternalFast(delurlRequest, fullUrl, uri);
            if (tmpRequest.getState() == DelurlState.IN_PROGRESS) {
                return tmpRequest;
            }
            ZoraPDFetchRequest.Builder builder = ZoraPDFetchRequest.builder(uri)
                    //realtime делаем более приоритетным
                    .source(isRealtime ? webmaster_robot : webmaster_robot_batch)
                    .timeout(isRealtime ? REALTIME_TIMEOUT : BATCH_TIMEOUT)
                    .priority(isRealtime ? 0L : 1L);

            var tmp = offlineZoraService.fetchUrl(builder.build()).getResponse();
            // делаем retry
            if (!isRealtime && tmp.getPdFetch().getStatus() == ZoraPDFetchStatus.Timeout) {
                tmp = offlineZoraService.fetchUrl(builder.build()).getResponse();
            }

            urlFetchResponse = ZoraConversionUtil.toUrlFetchResponse(tmp);

        } catch (URISyntaxException | SitaException e) {
            log.error("Unable to download url with Zora in delurl, Request: " + delurlRequest, e);
            return delurlRequest.withState(DelurlState.ERROR)
                    .withErrorMessage("Unable to download url with Zora in delurl, Request: " + delurlRequest + e);
        }

        YandexHttpStatus sitaHttpStatus = urlFetchResponse.getExtendedHttpStatus();
        if (YandexHttpStatus.isExtErrors(sitaHttpStatus) && sitaHttpStatus != HTTP_1003_ROBOTS_TXT_DISALLOW) {
            log.error("Unable to download url {} in delurl, but return NoReasonToDelete: {}",
                    fullUrl, urlFetchResponse.getExtendedHttpStatus());
            delurlRequest = delurlRequest.withState(DelurlState.REJECTED)
                    .withErrorMessage("Unable to download url %s in delurl, but return NoReasonToDelete: %s"
                            .formatted(fullUrl, urlFetchResponse.getExtendedHttpStatus().toString()));
        } else {
            try {
                delurlRequest = updateStatus(delurlRequest, urlFetchResponse);
                // если все ок - сразу отправляем на переобход
                sendUrlToSamovar(delurlRequest, fullUrl);
            } catch (IOException | InternalException e) {
                log.error("Error code from Zora for url {} in delurl", fullUrl, e);
                return delurlRequest.withState(DelurlState.ERROR)
                        .withErrorMessage("Error code from Zora for url %s in delurl ".formatted(fullUrl) + e);
            } catch (RecrawlServiceException e) {
                if (!e.isRecoverableError()) {
                    log.error("Unrecoverable error when processing url " + fullUrl, e);
                    return delurlRequest.withState(DelurlState.ERROR)
                            .withErrorMessage("Unrecoverable error when processing url %s ".formatted(fullUrl) + e);
                }
                throw e;
            }
        }
        return delurlRequest;
    }

    private void sendUrlToSamovar(DelUrlRequest delurlRequest, String fullUrl) throws RecrawlServiceException {
        if (delurlRequest.getState() == DelurlState.IN_PROGRESS) {
            addUrlService.requestDeleteSamovar(fullUrl);
        }
    }

    private DelUrlRequest processPrefixInternal(DelUrlRequest request) {
        // скачиваем robots.txt
        log.info("Checking prefix {} against robots.txt. HostId: {}", request.getRelativeUrl(), request.getHostId());
        String robotsTxtUrl = IdUtils.hostIdToUrl(request.getHostId()) + ROBOTS_TXT_PATH;

        var bodyAndHttpCode = goZoraService.executeRequest(GoZoraRequest.of(robotsTxtUrl));
        String httpBody = bodyAndHttpCode.getBody();
        int httpCode = bodyAndHttpCode.getHttpCode();
        if (httpCode != HttpStatus.SC_OK) {
            log.error("Unable to download robots.txt. HttpCode: {}", httpCode);
            return request.withAllowedInRobotsTxt(false).withState(DelurlState.REJECTED)
                    .withErrorMessage("Unable to download robots.txt. HttpCode: %d".formatted(httpCode));
        }
        try {
            AllowPrefixInfo info = RobotsTxtUtils.isPrefixAllowed(request.getRelativeUrl(), httpBody);
            log.info("Check finished: {}", info);
            if (info.isAllow()) {
                DelUrlRequest tmp = request.withAllowedInRobotsTxt(true).withState(DelurlState.REJECTED)
                        .withErrorMessage("Prefix is allowed");
                log.info(tmp.getErrorMessage());
                return tmp;
            } else {
                delurlQueueYDao.insert(request);
                return request.withAllowedInRobotsTxt(false).withState(DelurlState.ACCEPTED);
            }
        } catch (IOException e) {
            log.error("Error when checking prefix", e);
            return request.withState(DelurlState.ERROR)
                    .withErrorMessage("Error when checking prefix " + e);
        } catch (WebmasterYdbException e) {
            log.error("YDB error", e);
            return request.withState(DelurlState.ERROR)
                    .withErrorMessage("YDB error " + e);
        }
    }
}
