package ru.yandex.wmconsole.service;

import java.io.Reader;
import java.net.URL;

import org.apache.http.Consts;
import org.cyberneko.html.parsers.SAXParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Required;
import org.xml.sax.InputSource;

import ru.yandex.webmaster.common.util.xml.CompositeContentHandler;
import ru.yandex.wmconsole.data.DelUrlRequest;
import ru.yandex.wmconsole.service.error.WMCUserProblem;
import ru.yandex.wmconsole.verification.CheckMetaTagContentHandler;
import ru.yandex.wmtools.common.error.InternalException;
import ru.yandex.wmtools.common.error.UserException;
import ru.yandex.wmtools.common.service.IndexInfoService;
import ru.yandex.wmtools.common.sita.DocumentFormatEnum;
import ru.yandex.wmtools.common.sita.RobotsTxtFormatEnum;
import ru.yandex.wmtools.common.sita.SitaService;
import ru.yandex.wmtools.common.sita.SitaUrlFetchRequest;
import ru.yandex.wmtools.common.sita.SitaUrlFetchRequestBuilder;
import ru.yandex.wmtools.common.sita.SitaUrlFetchResponse;
import ru.yandex.wmtools.common.sita.UserAgentEnum;
import ru.yandex.wmtools.common.util.http.YandexHttpStatus;

/**
 * Checks if url should be deleted from index and passes it to the robot if necessary.
 *
 * @author ailyin
 */
public class DelUrlService extends AbstractUrlService {
    private static final Logger log = LoggerFactory.getLogger(DelUrlService.class);

    private DelUrlLogService delUrlLogService;
    private SitaService sitaService;

    @Override
    protected IndexInfoService.IndexedResult checkIfIndexed(URL url) throws UserException, InternalException {
        // First: check is url indexed
        IndexInfoService.IndexedResult resRu = super.checkIfIndexed(url);
        if (IndexInfoService.IndexedResult.INDEXED == resRu) {
            return resRu;
        }

        IndexInfoService.IndexedResult resTr = super.checkIfIndexedTr(url);
        if (IndexInfoService.IndexedResult.INDEXED == resTr) {
            return resTr;
        }

        IndexInfoService.IndexedResult resCom = super.checkIfIndexedCom(url);
        if (IndexInfoService.IndexedResult.INDEXED == resCom) {
            return resCom;
        }

        // Second: check url is fake if it isn't indexed
        if (IndexInfoService.IndexedResult.FAKE == resRu
                || IndexInfoService.IndexedResult.FAKE == resTr
                || IndexInfoService.IndexedResult.FAKE == resCom) {
            return IndexInfoService.IndexedResult.FAKE;
        }

        return IndexInfoService.IndexedResult.NOT_INDEXED;
    }

    public void delUrl(DelUrlRequest delUrlRequest) throws UserException, InternalException {
        IndexInfoService.IndexedResult indexedResult = checkIfIndexed(delUrlRequest.getUrl());
        if (indexedResult == IndexInfoService.IndexedResult.NOT_INDEXED) {
            throw new UserException(WMCUserProblem.URL_IS_NOT_INDEXED, "Url " + delUrlRequest.getUrl() + " is not indexed.");
        }

        if (indexedResult == IndexInfoService.IndexedResult.FAKE) {
            throw new UserException(WMCUserProblem.URL_IS_FAKE, "Url " + delUrlRequest.getUrl() + " is fake.");
        }

        if (shouldDelete(delUrlRequest)) {
            delUrlLogService.logUrl(delUrlRequest);
        } else {
            throw new UserException(WMCUserProblem.NO_REASONS_TO_DELETE_URL,
                    "Url " + delUrlRequest.getUrl() + " isn't disallowed and hasn't returned a proper http code.");
        }
    }

    private boolean shouldDelete(DelUrlRequest delUrlRequest) throws UserException, InternalException {
        SitaUrlFetchRequest sitaUrlFetchRequest = new SitaUrlFetchRequestBuilder(delUrlRequest.getUrl())
                .setDocumentFormat(DocumentFormatEnum.DF_HTTP_RESPONSE)
                .setCheckAllowedInRobotsTxt(true)
                .setRobotsTxtFormat(RobotsTxtFormatEnum.RF_NO_ROBOTS_TXT)
                .setUserAgent(UserAgentEnum.ROBOT)
                .createSitaUrlFetchRequest();

        SitaUrlFetchResponse sitaUrlFetchResponse = sitaService.request(sitaUrlFetchRequest);

        YandexHttpStatus sitaHttpStatus = sitaUrlFetchResponse.getSitaHttpStatus();
        if (sitaHttpStatus == null) {
            throw new UserException(WMCUserProblem.SERVER_IS_NOT_AVAILABLE,
                    "Url " + delUrlRequest.getUrl() + " is not available.");
        }

        if (sitaHttpStatus == YandexHttpStatus.HTTP_403_FORBIDDEN
                || sitaHttpStatus == YandexHttpStatus.HTTP_404_NOT_FOUND
                || sitaHttpStatus == YandexHttpStatus.HTTP_410_GONE) {
            return true;
        }

        if (sitaUrlFetchResponse.isAllowedInRobotsTxt() != null && !sitaUrlFetchResponse.isAllowedInRobotsTxt()) {
            return true;
        }

        try {
            Reader documentContent = sitaUrlFetchResponse.getDocumentContent(Consts.ISO_8859_1);
            if (documentContent != null && checkMetaTag(documentContent)) {
                return true;
            }
        } catch (Exception e) {
            log.error("Unable to parse document content", e);
        }
        return false;
    }

    boolean checkMetaTag(Reader document) throws Exception {
        CheckMetaTagContentHandler metaTagNoIndexHandler = new CheckMetaTagContentHandler("robots", "noindex", false);
        CheckMetaTagContentHandler metaTagYandexNoIndexHandler = new CheckMetaTagContentHandler("yandex", "noindex", false);
        CheckMetaTagContentHandler metaTagNoneHandler = new CheckMetaTagContentHandler("robots", "none", false);

        SAXParser saxParser = new SAXParser();
        saxParser.setContentHandler(
                new CompositeContentHandler(
                        metaTagNoIndexHandler,
                        metaTagNoneHandler,
                        metaTagYandexNoIndexHandler
                )
        );
        saxParser.parse(new InputSource(document));

        return metaTagNoIndexHandler.isFound() || metaTagNoneHandler.isFound() || metaTagYandexNoIndexHandler.isFound();
    }

    @Required
    public void setDelUrlLogService(DelUrlLogService delUrlLogService) {
        this.delUrlLogService = delUrlLogService;
    }

    @Required
    public void setNewSitaService(SitaService sitaService) {
        this.sitaService = sitaService;
    }
}
