package ru.yandex.canvas.service.scraper;

import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

import org.apache.http.client.utils.URIBuilder;
import org.apache.http.impl.nio.client.HttpAsyncClientBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.HttpEntity;
import org.springframework.http.HttpMethod;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.http.client.HttpComponentsAsyncClientHttpRequestFactory;
import org.springframework.http.converter.xml.MappingJackson2XmlHttpMessageConverter;
import org.springframework.web.client.AsyncRestTemplate;
import org.springframework.web.client.RestClientException;
import org.springframework.web.client.RestClientResponseException;
import org.springframework.web.util.UriComponentsBuilder;

import ru.yandex.canvas.model.scraper.ScraperData;

import static ru.yandex.canvas.service.TankerKeySet.ERROR;


public class ScraperService {
    private static final Logger logger = LoggerFactory.getLogger(ScraperService.class);

    private static final String CONTENT_RELATIVE_URL = "content";
    private static final String URL_IS_UNREACHABLE_MSG = "url-is-unreachable-msg";
    private static final String URL_IS_MALFORMED_MSG = "url-is-malformed-msg";
    private static final String INTERNAL_SERVICE_ERROR = "internal-server-error";
    private static final String NO_RESULT = "no-result";

    private static final long SCRAPER_TIMEOUT_SEC = 300;


    private final URI scraperContentUri;
    private AsyncRestTemplate asyncRestTemplate;


    public ScraperService(String scraperUrl) {
        this.scraperContentUri =
                UriComponentsBuilder.fromHttpUrl(scraperUrl).path(CONTENT_RELATIVE_URL).build().toUri();
        HttpComponentsAsyncClientHttpRequestFactory requestFactory = new HttpComponentsAsyncClientHttpRequestFactory();
        requestFactory.setHttpAsyncClient(HttpAsyncClientBuilder.create()
                .setMaxConnPerRoute(100)
                .setMaxConnTotal(100)
                .build());
        this.asyncRestTemplate = new AsyncRestTemplate(requestFactory);
        this.asyncRestTemplate.getMessageConverters().removeIf(
                m -> m.getClass().getName().equals(MappingJackson2XmlHttpMessageConverter.class.getName()));

    }

    public ScraperData scrapSiteInfo(String userUrl) {
        URL targetUri;
        URI scraperUrl;
        try {
            targetUri = URLHelper.normalizeUrl(userUrl);
            scraperUrl = new URIBuilder(scraperContentUri)
                    .addParameter("url", targetUri.toString())
                    .build();
        } catch (URISyntaxException | IllegalArgumentException ex) {
            throw new ScraperBadUrlException(ERROR.key(URL_IS_MALFORMED_MSG), "Url " + userUrl + " is bad formed", ex);

        }

        ResponseEntity<ScraperData> scraperResponse;
        try {
            scraperResponse = asyncRestTemplate
                    .exchange(scraperUrl, HttpMethod.GET, new HttpEntity<>(null, null), ScraperData.class)
                    .get(SCRAPER_TIMEOUT_SEC, TimeUnit.SECONDS);
        } catch (InterruptedException e) {
            throw new ScraperException(ERROR.key(INTERNAL_SERVICE_ERROR), "Scraper request was interrupted", e);
        } catch (TimeoutException e) {
            logger.warn("Scraper request returned with timeout (url=" + userUrl + ")");
            throw new ScraperUnreachableUrlException(ERROR.key(NO_RESULT),
                    "Scraper request returned with timeout (url=" + userUrl + ")", e);
        } catch (ExecutionException ex) {
            Throwable cause = ex.getCause();
            if (cause instanceof RestClientResponseException) {
                RestClientResponseException restEx = (RestClientResponseException) cause;
                String errorBody = restEx.getResponseBodyAsString();
                if (HttpStatus.valueOf(restEx.getRawStatusCode()).is4xxClientError()) {
                    throw new ScraperUnreachableUrlException(ERROR.key(URL_IS_UNREACHABLE_MSG),
                            "Scraper response status code is " + restEx.getRawStatusCode() +
                                    " (url=" + userUrl + "). Response: " + errorBody);
                }
                throw new ScraperException(ERROR.key(INTERNAL_SERVICE_ERROR),
                        "Scraper response is " + restEx.getRawStatusCode() +
                                " (url=" + userUrl + "). Response: " + errorBody, ex);
            }
            if (cause instanceof RestClientException) {
                throw new ScraperException(ERROR.key(INTERNAL_SERVICE_ERROR),
                        "Couldn't retrieve scraper response (url=" + userUrl + ")", ex);
            }
            logger.error("Scraper request was failed", ex);
            throw new ScraperException(ERROR.key(INTERNAL_SERVICE_ERROR),
                    "Scraper request was failed", ex);
        }

        if (!scraperResponse.getStatusCode().is2xxSuccessful()) {
            throw new ScraperException(ERROR.key(INTERNAL_SERVICE_ERROR),
                    "Scraper response is not 200 (url=" + userUrl + ")");
        }
        return scraperResponse.getBody();
    }

}
