package ru.yandex.webmaster3.storage.robotstxt;

import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;

import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import ru.yandex.webmaster3.core.WebmasterException;
import ru.yandex.webmaster3.core.data.HttpResponsePart;
import ru.yandex.webmaster3.core.http.WebmasterErrorResponse;
import ru.yandex.webmaster3.core.robotstxt.AllowInfo;
import ru.yandex.webmaster3.core.robotstxt.AnalysisResult;
import ru.yandex.webmaster3.core.robotstxt.ErrorInfo;
import ru.yandex.webmaster3.core.robotstxt.FormatErrorType;
import ru.yandex.webmaster3.core.util.UrlUtils;
import ru.yandex.webmaster3.core.util.WwwUtil;
import ru.yandex.webmaster3.core.zora.GoZoraService;
import ru.yandex.webmaster3.core.zora.ZoraConversionUtil;
import ru.yandex.webmaster3.core.zora.data.response.ZoraUrlFetchResponse;
import ru.yandex.webmaster3.core.zora.go_data.request.GoZoraRequest;
import ru.yandex.webmaster3.storage.robotstxt.exception.AnalisisRobotsTxtException;
import ru.yandex.webmaster3.storage.robotstxt.exception.AnalisisRobotsTxtHttpResponseException;
import ru.yandex.webmaster3.storage.robotstxt.model.AnalyzeRobotsTxtErrorType;
import ru.yandex.webmaster3.storage.robotstxt.model.AnalyzeRobotsTxtInputData;
import ru.yandex.webmaster3.storage.robotstxt.model.AnalyzeRobotsTxtResult;
import ru.yandex.webmaster3.storage.robotstxt.model.RobotsTxtUrlAllowInfo;
import ru.yandex.webmaster3.storage.robotstxt.model.UrlAllowInfo;
import ru.yandex.webmaster3.storage.util.W3DispatcherHttpService;
import ru.yandex.wmtools.common.error.InternalException;
import ru.yandex.webmaster3.core.util.http.YandexMimeType;
import ru.yandex.wmtools.common.util.URLUtil;
import ru.yandex.wmtools.common.util.http.YandexHttpStatus;


/**
 * @author: ishalaru
 * DATE: 20.05.2019
 * <p>
 * Service for run AnalyzeRobotsTxt process
 */
@Slf4j
@Service
@RequiredArgsConstructor(onConstructor_ = {@Autowired})
public class AnalyzeRobotsTxtService {
    public static final int MAX_URL_LENGTH = 1024;
    public static final int MAX_URLS_COUNT = 100;
    public static final int MAX_ROBOTS_TXT_SIZE = 1024 * 500;

    private static final String ROBOTS_TXT_PATH = "/robots.txt";

    private final GoZoraService goZoraService;
    private final W3DispatcherHttpService w3dispatcherHttpService;


    public AnalyzeRobotsTxtResult process(AnalyzeRobotsTxtInputData request) throws WebmasterException {
        String hostName = StringUtils.trimToEmpty(request.getHostName());
        String robotsTxtContent = StringUtils.trimToEmpty(request.getRobotsTxtContent());
        String readableRobotsTxtContent = null;

        boolean loadOnly = request.isLoadOnly();
        String[] urls = request.getUrlsAsArray();
        if (urls.length > MAX_URLS_COUNT) {
            throw new AnalisisRobotsTxtException(AnalyzeRobotsTxtErrorType.ANALYZE_ROBOTSTXT__TOO_MANY_URLS);
        }

        /// вырезать
        URL hostUrl = null;
        if (StringUtils.isEmpty(hostName)) {
            if (StringUtils.isEmpty(robotsTxtContent)) {
                throw new WebmasterException("Url and robots txt are missing",
                        new WebmasterErrorResponse.IllegalParameterValueResponse(this.getClass(), "hostName", null));
            }
        } else {
            try {
                hostUrl = UrlUtils.prepareUrl(hostName, true);
            } catch (IllegalArgumentException e) {
                log.error("Unable to parse hostname: {}", hostName, e);
                throw new WebmasterException("Url and robots txt are missing",
                        new WebmasterErrorResponse.IllegalParameterValueResponse(this.getClass(), "hostName",
                                hostName));
            }
        }
        ///

        if (robotsTxtContent.isEmpty() && hostUrl != null) {
            ZoraUrlFetchResponse robotsTxtResponse = getRobotsTxtContent(hostUrl);
            YandexHttpStatus extStatus = robotsTxtResponse.getExtendedHttpStatus();
            if (YandexHttpStatus.isExtErrors(extStatus)) {
                log.error("Got error http status: {}", extStatus);
                throw new AnalisisRobotsTxtException(AnalyzeRobotsTxtErrorType.ANALYZE_ROBOTSTXT__UNABLE_TO_DOWNLOAD_ROBOTS_TXT);
            }

            HttpResponsePart httpResponse = null;

            try {
                robotsTxtContent = robotsTxtResponse.getDocumentContentAsString();
            } catch (Exception e) {
                log.error("Failed to parse robots.txt", e);
                throw new AnalisisRobotsTxtException(AnalyzeRobotsTxtErrorType.ANALYZE_ROBOTSTXT__UNABLE_TO_DOWNLOAD_ROBOTS_TXT);
            }
            httpResponse = HttpResponsePart.createFromZoraResponse(robotsTxtResponse, robotsTxtContent);
            if (YandexHttpStatus.is3XX(extStatus)) {
                throw new AnalisisRobotsTxtHttpResponseException(AnalyzeRobotsTxtErrorType.ANALYZE_ROBOTSTXT__REDIRECT_NOT_SUPPORTED, httpResponse);
            } else if (!YandexHttpStatus.is200Or2XXX(extStatus)) {
                throw new AnalisisRobotsTxtHttpResponseException(AnalyzeRobotsTxtErrorType.ANALYZE_ROBOTSTXT__ILLEGAL_HTTP_CODE, httpResponse);
            } else if (StringUtils.isEmpty(robotsTxtContent)) {
                throw new AnalisisRobotsTxtHttpResponseException(AnalyzeRobotsTxtErrorType.ANALYZE_ROBOTSTXT__EMPTY_ROBOTS_TXT, httpResponse);
            }

            if (robotsTxtResponse.getMimeType() == YandexMimeType.MIME_TEXT
                    || robotsTxtResponse.getMimeType() == YandexMimeType.MIME_HTML) {
                Charset charset = robotsTxtResponse.getCharsetJava();
                try {
                    readableRobotsTxtContent = robotsTxtResponse.getDocumentContentAsString(charset);
                } catch (IOException | InternalException e) {
                    log.error("Unable to get response body", e);
                }
            }
        }

        if (loadOnly) {
            return new AnalyzeRobotsTxtResult(robotsTxtContent);
        }

        //noinspection RedundantArrayCreation
        UrlAllowInfo[] allowInfos = new UrlAllowInfo[urls.length];
        List<String> validUrls = getValidUrls(hostUrl, urls, allowInfos);

        AnalysisResult result = w3dispatcherHttpService.analyzeRobotsTxt(robotsTxtContent, validUrls);

        final List<ErrorInfo> errorInfos;
        if (robotsTxtContent.length() > MAX_ROBOTS_TXT_SIZE) {
            String essentialRobotsTxtContent = robotsTxtContent.substring(0, MAX_ROBOTS_TXT_SIZE);
            int failLine = StringUtils.countMatches(essentialRobotsTxtContent, "\n") + 1;
            errorInfos = addToRightPosition(result.getErrors(), new ErrorInfo(FormatErrorType.ERR_ROBOTS_HUGE,
                    failLine));
        } else {
            errorInfos = result.getErrors();
        }

        int j = 0;
        for (AllowInfo info : result.getAreAllowed()) {
            while (allowInfos[j] != null) {
                // if urlAllowInfos.get(j)!= null it corresponds to an invalid url
                j++;
            }

            if (info.isSyntaxError()) {
                allowInfos[j] = new UrlAllowInfo(urls[j], UrlAllowInfo.Error.URL_SYNTAX_ERROR);
            } else {
                allowInfos[j] = new UrlAllowInfo(urls[j], info);
            }
            j++;
        }

        String content = readableRobotsTxtContent != null ? readableRobotsTxtContent : robotsTxtContent;
        return createNormalResponse(content, errorInfos, result, allowInfos);
    }

    protected ZoraUrlFetchResponse getRobotsTxtContent(final URL host) throws WebmasterException {
        final URL robotsTxtUrl;
        try {
            robotsTxtUrl = getRobotsTxtUrl(host);
        } catch (MalformedURLException e) {
            log.debug("Unable to get robots.txt content", e);
            throw new WebmasterException("Unable to create robots.txt url",
                    new WebmasterErrorResponse.InternalUnknownErrorResponse(this.getClass(),
                            "Unable to create robots.txt url"), e);
        }

        return ZoraConversionUtil.toUrlFetchResponse(goZoraService.executeRequestFetchResponse(GoZoraRequest.of(robotsTxtUrl.toString())));
    }

    private static URL getRobotsTxtUrl(final URL url) throws MalformedURLException {
        return new URL(url, ROBOTS_TXT_PATH);
    }

    private static List<String> getValidUrls(URL hostname, String[] urls, UrlAllowInfo[] allowInfos) {
        List<String> result = new LinkedList<>();

        // addToClickHouse corresponding allowInfos for invalid urls and addToClickHouse valid urls to result list
        for (int i = 0; i < urls.length; i++) {
            if (urls[i].length() > MAX_URL_LENGTH) {
                allowInfos[i] = new UrlAllowInfo(urls[i], UrlAllowInfo.Error.ROBOTSTXT_URL_TOO_LONG);
                continue;
            }

            if (hostname == null) {
                if (!isRelativeUrl(urls[i])) {
                    allowInfos[i] = new UrlAllowInfo(urls[i], UrlAllowInfo.Error.NO_HOSTNAME);
                    continue;
                }
                result.add(urls[i]);
            } else {
                URL url;
                try {
                    url = urlFromString(hostname, urls[i]);
                } catch (IllegalArgumentException e) {
                    allowInfos[i] = new UrlAllowInfo(urls[i], UrlAllowInfo.Error.URL_SYNTAX_ERROR);
                    continue;
                }

                String fullHostName = URLUtil.getHostName(hostname, true);
                String fullUrlHostName = URLUtil.getHostName(url, true);
                boolean equalsIgnoreWww = WwwUtil.equalsIgnoreWww(
                        fullHostName,
                        fullUrlHostName);
                boolean equalsExact = fullHostName.equalsIgnoreCase(fullUrlHostName);
                if (!equalsIgnoreWww) {
                    allowInfos[i] = new UrlAllowInfo(urls[i], UrlAllowInfo.Error.WRONG_DOMAIN);
                    continue;
                } else if (!isRelativeUrl(urls[i]) && !equalsExact) {
                    urls[i] = WwwUtil.switchWWW(urls[i]);
                }

                /*
                 * robots.txt parser assumes '/' always follows hostname.
                 * So we addToClickHouse '/' at the end when url path is empty.
                 */
                result.add(StringUtils.isEmpty(url.getFile()) ? "/" : url.getFile());
            }
        }

        return result;
    }

    private static boolean isRelativeUrl(String url) {
        return url.startsWith("/");
    }

    private static URL urlFromString(URL hostName, String url) throws IllegalArgumentException {
        String u = url;
        if (isRelativeUrl(u)) {
            u = hostName.getProtocol() + "://" + hostName.getAuthority() + u;
        } else if (!u.contains("://")) {
            u = hostName.getProtocol() + "://" + u;
        }

        return UrlUtils.doPrepareUrl(u, true, false, false);
    }

    private static List<ErrorInfo> addToRightPosition(List<ErrorInfo> errorInfos, ErrorInfo errorInfo) {
        List<ErrorInfo> res = new ArrayList<>(errorInfos.size() + 1);
        boolean isInserted = false;
        for (ErrorInfo cur : errorInfos) {
            if (!isInserted && cur.getLineNumber() > errorInfo.getLineNumber()) {
                res.add(errorInfo);
                isInserted = true;
            }
            res.add(cur);
        }
        if (!isInserted) {
            res.add(errorInfo);
        }
        return res;
    }

    private static AnalyzeRobotsTxtResult createNormalResponse(final String robotsTxtContent,
                                                               List<ErrorInfo> errorInfos, AnalysisResult result,
                                                               UrlAllowInfo[] urlAllowInfos) {
        // fill parse errors
        List<AnalyzeRobotsTxtResult.RobotsTxtErrorInfo> parseErrors = new ArrayList<>(errorInfos.size());
        for (ErrorInfo errorInfo : errorInfos) {
            parseErrors.add(new AnalyzeRobotsTxtResult.RobotsTxtErrorInfo(errorInfo.getType(),
                    errorInfo.getLineNumber(),
                    getLine(robotsTxtContent, (int) errorInfo.getLineNumber())));
        }

        // fill accepted lines
        List<AnalyzeRobotsTxtResult.RobotsTxtSection> acceptedLines = new ArrayList<>();
        List<Long> acceptedLineNumbers = result.getAcceptedLines();

        AnalyzeRobotsTxtResult.RobotsTxtSection section = new AnalyzeRobotsTxtResult.RobotsTxtSection();
        for (int i = 0; i < acceptedLineNumbers.size(); i++) {
            if ((i == 0) || (acceptedLineNumbers.get(i) != acceptedLineNumbers.get(i - 1) + 1)) {//new section
                if (!section.getLines().isEmpty()) {
                    acceptedLines.add(section);
                }
                section = new AnalyzeRobotsTxtResult.RobotsTxtSection();
            }
            section.addLine(new AnalyzeRobotsTxtResult.RobotsTxtLine(acceptedLineNumbers.get(i).intValue(),
                    getLine(robotsTxtContent, acceptedLineNumbers.get(i).intValue())));
        }
        if (!section.getLines().isEmpty()) {
            acceptedLines.add(section);
        }

        // fill allow info
        List<RobotsTxtUrlAllowInfo> allowInfos = new ArrayList<>();

        for (UrlAllowInfo urlAllowInfo : urlAllowInfos) {
            if (urlAllowInfo.getAllowInfo() != null) {
                allowInfos.add(new RobotsTxtUrlAllowInfo(
                        urlAllowInfo.getUrl(),
                        urlAllowInfo.getError(),
                        urlAllowInfo.getAllowInfo().isAllowed(),
                        urlAllowInfo.getAllowInfo().getRule(),
                        urlAllowInfo.getAllowInfo().getChangedUrl()));
            } else {
                allowInfos.add(new RobotsTxtUrlAllowInfo(
                        urlAllowInfo.getUrl(),
                        urlAllowInfo.getError(),
                        null,
                        null,
                        null));
            }
        }

        return new AnalyzeRobotsTxtResult(robotsTxtContent, parseErrors, acceptedLines, allowInfos);
    }

    public static String getLine(String text, int line) {
        if (text.length() == 0) {
            if (line == 1) {
                return StringUtils.EMPTY;
            } else {
                return null;
            }
        }
        int s = 0;
        int i = 0;
        char ch;
        char ch1 = 0;

        int start = 0;
        int end = 0;
        boolean consume = false;

        while (i < text.length()) {
            ch = text.charAt(i);
            if (i < text.length() - 1) {
                ch1 = text.charAt(i + 1);
            } else {
                ch1 = 0;
            }

            if (consume || (ch != '\r' && ch != '\n')) {
                consume = false;
                i++;
                continue;
            }

            start = s;
            end = i;

            s = i = i + 1;
            if (ch == '\r' && ch1 == '\n') {
                consume = true;
                s = s + 1;
            }
            if (--line == 0) {
                break;
            }
        }
        if (line == 0) {
            return text.substring(start, end);
        }
        if (line == 1 && ch1 == 0) {
            return text.substring(s);
        }
        return null;
    }
}
