package ru.yandex.webmaster3.worker.spamban;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.function.Predicate;

import com.datastax.driver.core.utils.UUIDs;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.mutable.MutableInt;
import org.apache.commons.lang3.mutable.MutableLong;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;

import ru.yandex.webmaster3.core.data.WebmasterHostId;
import ru.yandex.webmaster3.core.host.service.HostOwnerService;
import ru.yandex.webmaster3.core.worker.task.PeriodicTaskState;
import ru.yandex.webmaster3.core.worker.task.PeriodicTaskType;
import ru.yandex.webmaster3.core.worker.task.TaskResult;
import ru.yandex.webmaster3.storage.importanturls.ImportantUrlsYDao;
import ru.yandex.webmaster3.storage.spam.SeedSpamHostPatternsYDao;
import ru.yandex.webmaster3.storage.spam.SpamFilterUtil;
import ru.yandex.webmaster3.worker.PeriodicTask;
import ru.yandex.webmaster3.worker.TaskSchedule;

/**
 * @author avhaliullin
 */

@Component("banSpamOwnersTask")
@Slf4j
@RequiredArgsConstructor(onConstructor_ = @Autowired)
public class BanSpamOwnersTask extends PeriodicTask<PeriodicTaskState> {

    private static final int OWNERS_REQUEST_BATCH = 1000;
    private static final int HOSTS_FOR_OWNER_SOFT_THRESHOLD = 100;
    private static final int HOSTS_FOR_OWNER_HARD_THRESHOLD = 500;
    private static final int URLS_PER_HOST_SOFT_THRESHOLD = 99;
    private static final int URLS_PER_HOST_HARD_THRESHOLD = 90;

    private final SeedSpamHostPatternsYDao seedSpamHostPatternsYDao;
    private final ImportantUrlsYDao importantUrlsYDao;
    private final HostOwnerService hostOwnerService;

    @Override
    public Result run(UUID runId) throws Exception {
        Set<String> currentRules = new HashSet<>();
        seedSpamHostPatternsYDao.getAllRules(pair -> currentRules.add(pair.getRight()));
        log.info("Loaded {} rules", currentRules.size());
        Predicate<WebmasterHostId> currentFilter = SpamFilterUtil.makeIsBannedPredicateForSuffixes(currentRules);

        Map<WebmasterHostId, MutableInt> host2UrlsCount = new HashMap<>();
        Map<WebmasterHostId, OwnerStat> ownerStats = new HashMap<>();
        MutableLong totalUrls = new MutableLong(0L);
        MutableLong totalHosts = new MutableLong(0L);
        importantUrlsYDao.forEachHost(host -> {
            if (currentFilter.test(host)) { // уже забанен, неинтересно
                return;
            }
            if (host2UrlsCount.size() >= OWNERS_REQUEST_BATCH) {
                if (!host2UrlsCount.containsKey(host)) {
                    totalHosts.add(host2UrlsCount.size());
                    flushHosts(host2UrlsCount, ownerStats);
                }
            }
            host2UrlsCount.computeIfAbsent(host, ign -> new MutableInt(0)).increment();
            if (totalUrls.incrementAndGet() % 10000L == 0L) {
                log.info("Processed {} urls", totalUrls.getValue());
            }
        });
        flushHosts(host2UrlsCount, ownerStats);
        log.info("Loaded {} owners with {} hosts and {} urls", ownerStats.size(), totalHosts.getValue(), totalUrls.getValue());

        long hostsToBan = 0L;
        long urlsToBan = 0L;
        List<WebmasterHostId> toBan = new ArrayList<>();
        for (Map.Entry<WebmasterHostId, OwnerStat> entry : ownerStats.entrySet()) {
            OwnerStat stat = entry.getValue();
            if (stat.hosts >= HOSTS_FOR_OWNER_SOFT_THRESHOLD) {
                int urlsPerHost = stat.urls / stat.hosts;
                boolean ban = urlsPerHost >= URLS_PER_HOST_SOFT_THRESHOLD ||
                        (stat.hosts >= HOSTS_FOR_OWNER_HARD_THRESHOLD && urlsPerHost >= URLS_PER_HOST_HARD_THRESHOLD);
                if (ban) {
                    toBan.add(entry.getKey());
                    hostsToBan += stat.hosts;
                    urlsToBan += stat.urls;
                }
            }
        }
        log.info("Found {} new owners to ban with {} hosts and {} urls", toBan.size(), hostsToBan, urlsToBan);
        for (WebmasterHostId owner : toBan) {
            UUID ruleId = UUIDs.timeBased();
            String rule = "." + owner.getPunycodeHostname();
            log.info("Adding rule {} for {}", ruleId, rule);
            seedSpamHostPatternsYDao.addRule(ruleId, rule);
        }
        return new Result(TaskResult.SUCCESS);
    }

    private void flushHosts(Map<WebmasterHostId, MutableInt> host2UrlsCount, Map<WebmasterHostId, OwnerStat> ownerStats) {
        if (host2UrlsCount.isEmpty()) {
            return;
        }
        Map<WebmasterHostId, WebmasterHostId> host2Owner = hostOwnerService.mapHostsToOwners(new ArrayList<>(host2UrlsCount.keySet()));
        for (Map.Entry<WebmasterHostId, MutableInt> hostStatEntry : host2UrlsCount.entrySet()) {
            ownerStats.computeIfAbsent(host2Owner.get(hostStatEntry.getKey()), ign -> new OwnerStat()).addUrlsFromHost(hostStatEntry.getValue().getValue());
        }
        host2UrlsCount.clear();
    }

    private static class OwnerStat {
        int hosts = 0;
        int urls = 0;

        void addUrlsFromHost(int urlsCount) {
            hosts++;
            urls += urlsCount;
        }
    }

    @Override
    public PeriodicTaskType getType() {
        return PeriodicTaskType.BAN_SPAM_OWNERS_TASK;
    }

    @Override
    public TaskSchedule getSchedule() {
        //return TaskSchedule.startByCron("13 31 22 * * *");
        return TaskSchedule.never();
    }
}
