package ru.yandex.webmaster3.core.proto.converter;

import java.net.MalformedURLException;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;

import com.datastax.driver.core.utils.UUIDs;
import com.google.common.hash.HashFunction;
import com.google.common.hash.Hasher;
import com.google.common.hash.Hashing;
import org.apache.commons.collections4.CollectionUtils;
import org.joda.time.DateTime;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import ru.yandex.webmaster3.core.WebmasterException;
import ru.yandex.webmaster3.core.data.WebmasterApplicationInfo;
import ru.yandex.webmaster3.core.data.WebmasterHostId;
import ru.yandex.webmaster3.core.http.WebmasterErrorResponse;
import ru.yandex.webmaster3.core.sitemap.UserSitemap;
import ru.yandex.webmaster3.core.sitemap.raw.HostRelatedSitemaps;
import ru.yandex.webmaster3.core.sitemap.raw.RawSitemapInfo;
import ru.yandex.webmaster3.core.util.IdUtils;
import ru.yandex.webmaster3.proto.Sitemap;
import ru.yandex.wmtools.common.SupportedProtocols;

import static ru.yandex.webmaster3.proto.Sitemap.SitemapType.SITEMAP_INVALID;

/**
 * @author aherman
 */
public class SitemapProtoConverter {
    private static final Logger log = LoggerFactory.getLogger(SitemapProtoConverter.class);

    private static final int MAX_SITEMAP_ERRORS_TO_STORE = 25;
    private static final int MAX_SITEMAP_URLS_TO_STORE = 50_000;

    public static RawSitemapInfo convert(String sitemapUrl, Sitemap.SitemapInfo sitemapInfo) {
        sitemapUrl = ConverterUtils.canonicalizeSitemapUrl(sitemapUrl);

        boolean isIndex = false;
        List<String> urls = Collections.emptyList();
        if (sitemapInfo.getType() == Sitemap.SitemapType.SITEMAP_INDEX) {
            isIndex = true;
            urls = sitemapInfo.getUrlsList();
        }

        String redirTarget = sitemapInfo.getRedirTarget();
        if (redirTarget != null && redirTarget.isEmpty()) {
            redirTarget = null;
        }

        // it is important to use getUrlCount() here, not getUrlsCount(),
        // because in case of non index sitemaps urls collection will be empty
        int urlsCount = sitemapInfo.getUrlCount();

        Sitemap.SitemapInfoTimestamps sitemapInfoTimestamps = sitemapInfo.getTimestamps();
        if (sitemapInfoTimestamps != null && sitemapInfoTimestamps.hasLastAccessTimestamp()) {
            DateTime lastAccessTS = new DateTime(sitemapInfoTimestamps.getLastAccessTimestamp() * 1000L);

            // The way Lemur works, if sitemap couldn't be parsed, it might have outdated values for urls attribute
            if (sitemapInfoTimestamps.hasUrlsTimestamp()) {
                DateTime urlsTS = new DateTime(sitemapInfoTimestamps.getUrlsTimestamp() * 1000L);
                if (urlsTS.isBefore(lastAccessTS)) {
                    urls = Collections.emptyList();
                    urlsCount = 0;
                }
            }

            // Same for isIndex
            if (sitemapInfoTimestamps.hasIsIndexTimestamp()) {
                DateTime isIndexTS = new DateTime(sitemapInfoTimestamps.getIsIndexTimestamp() * 1000L);
                if (isIndexTS.isBefore(lastAccessTS)) {
                    isIndex = false;
                }
            }

            // Same for redirect
            if (sitemapInfoTimestamps.hasRedirTargetTimestamp()) {
                DateTime redirectTS = new DateTime(sitemapInfoTimestamps.getRedirTargetTimestamp() * 1000L);
                if (redirectTS.isBefore(lastAccessTS)) {
                    redirTarget = null;
                }
            }

            // As for errors attribute, Lemur ensures that we won't get outdated values
        }

        // isSitemapParsed here means that parser tried to parse sitemap, with errors or without
        boolean isSitemapValid = sitemapInfo.getType() != SITEMAP_INVALID && sitemapInfo.getIsSitemapParsed();
        if (!isSitemapValid) {
            isIndex = false;
            urls = Collections.emptyList();
            urlsCount = 0;
        }

        if (urls.size() > MAX_SITEMAP_URLS_TO_STORE) {
            log.info("SitemapProtoConverter: {} has too many URLs: {}", sitemapUrl, urls.size());
            urls = urls.subList(0, MAX_SITEMAP_URLS_TO_STORE - 1);
        }

        // Lemur sitemaps with redirect might have strange errors, which we don't care about
        int errorsCount = sitemapInfo.getErrorCount();
        List<RawSitemapInfo.SitemapError> errors = getErrors(sitemapInfo.getErrorsList());
        if (redirTarget != null) {
            errors = Collections.emptyList();
            errorsCount = 0;
        }

        return new RawSitemapInfo(sitemapUrl,
                isIndex,
                sitemapInfo.getHttpCode(),
                ConverterUtils.toDateTime(sitemapInfo.getLastAccess()),
                urlsCount,
                urls,
                errorsCount,
                errors,
                redirTarget,
                null);
    }

    private static List<RawSitemapInfo.SitemapError> getErrors(List<Sitemap.SitemapError> errorsList) {
        if (CollectionUtils.isEmpty(errorsList)) {
            return Collections.emptyList();
        }

        List<RawSitemapInfo.SitemapError> result = errorsList.stream().limit(MAX_SITEMAP_ERRORS_TO_STORE)
                .map(s -> {
                    RawSitemapInfo.SitemapError error = new RawSitemapInfo.SitemapError();
                    error.setErrorCode(s.getCode().getNumber());
                    error.setLine(s.getLine());
                    error.setMessage(s.getText());

                    return error;
                })
                .collect(Collectors.toList());

        return result;
    }

    public static HostRelatedSitemaps convert(Sitemap.HostRelatedSitemapMessage message) {
        String hostname = message.getHostname();
        URL url;
        try {
            url = SupportedProtocols.getURL(hostname);
        } catch (MalformedURLException | URISyntaxException | SupportedProtocols.UnsupportedProtocolException e) {
            throw new WebmasterException("Unable to parse url: " + hostname, new WebmasterErrorResponse.InternalUnknownErrorResponse(SitemapProtoConverter.class,
                    "Unable to parse url: " + hostname
            ));
        }
        WebmasterHostId hostId = IdUtils.urlToHostId(url);

        List<Sitemap.RelatedSitemapInfo> relatedSitemapsList = message.getRelatedSitemapsList();
        List<HostRelatedSitemaps.RelatedSitemap> rs = new ArrayList<>(relatedSitemapsList.size());
        HashFunction hashFunction = Hashing.murmur3_128();
        Hasher hasher = hashFunction.newHasher();
        for (Sitemap.RelatedSitemapInfo relatedSitemapInfo : relatedSitemapsList) {
            try {
                String sitemapUrl = ConverterUtils.canonicalizeSitemapUrl(relatedSitemapInfo.getUrl());
                HostRelatedSitemaps.SitemapSource source = toSource(relatedSitemapInfo.getSourceId());
                DateTime addTime = ConverterUtils.toDateTime(relatedSitemapInfo.getAddTime());
                rs.add(new HostRelatedSitemaps.RelatedSitemap(sitemapUrl, source, addTime));
            } catch (Exception e) {
                log.error("Unable to convert sitemap: {}", relatedSitemapInfo.getUrl(), e);
            }
        }
        Iterator<String> it = rs.stream().map(HostRelatedSitemaps.RelatedSitemap::getUrl).sorted().iterator();
        while (it.hasNext()) {
            hasher.putUnencodedChars(it.next());
        }

        return new HostRelatedSitemaps(UUIDs.timeBased(), hostId, DateTime.now(), rs, hasher.hash());
    }

    private static HostRelatedSitemaps.SitemapSource toSource(Sitemap.SitemapSourceId sourceId) {
        switch (sourceId) {
            case ROBOTS_SOURCE_ID: return HostRelatedSitemaps.SitemapSource.ROBOTS_TXT;
            case WMCLOG_SOURCE_ID: return HostRelatedSitemaps.SitemapSource.WEBMASTER;
            case SITEMAPINDEX_SOURCE_ID: return HostRelatedSitemaps.SitemapSource.SITEMAP_INDEX;
            case WMC_ROBOTS_SOURCE_ID: return HostRelatedSitemaps.SitemapSource.ROBOTS_TXT;
            default: return HostRelatedSitemaps.SitemapSource.UNKNOWN;
        }
    }

    public static Sitemap.UserSitemapMessage createUserSitemapRequest(WebmasterApplicationInfo applicationInfo,
                                                                      List<UserSitemap> sitemaps){
        Sitemap.UserSitemapMessage.Builder builder = Sitemap.UserSitemapMessage.newBuilder()
                .setApplicationInfo(ConverterUtils.toApplicationInfo(applicationInfo))
                .setMessageInfo(ConverterUtils.createMessageInfo());
        for (UserSitemap sitemap: sitemaps){
            builder.addSitemaps(createUserSitemapInfo(sitemap));
        }
        return builder.build();
    }

    private static Sitemap.UserSitemapInfo createUserSitemapInfo(UserSitemap userSitemap){
        return Sitemap.UserSitemapInfo.newBuilder()
                .setAddTime(ConverterUtils.fromDateTime(userSitemap.getAddDate()))
                .setSitemapHost(IdUtils.hostIdToUrl(userSitemap.getHostId()))
                .setSitemapUrl(userSitemap.getSitemapUrl())
                .build();
    }
}
