//package ru.yandex.webmaster3.worker.checklist;
//
//import org.apache.commons.lang3.mutable.MutableInt;
//import org.apache.commons.lang3.tuple.Pair;
//import org.joda.time.DateTime;
//import org.slf4j.Logger;
//import org.slf4j.LoggerFactory;
//import org.springframework.beans.factory.annotation.Required;
//import ru.yandex.webmaster3.core.checklist.data.HostMicrodataStats;
//import ru.yandex.webmaster3.core.checklist.data.MicrodataDocStatus;
//import ru.yandex.webmaster3.core.data.WebmasterHostGeneration;
//import ru.yandex.webmaster3.core.data.WebmasterHostId;
//import ru.yandex.webmaster3.core.util.IdUtils;
//import ru.yandex.webmaster3.core.worker.task.WorkerTaskType;
//import ru.yandex.webmaster3.proto.Semantic;
//import ru.yandex.webmaster3.proto.converter.MicrodataStatsConverter;
//import ru.yandex.webmaster3.storage.checklist.dao.MicrodataErrorSamplesCDao;
//import ru.yandex.webmaster3.storage.checklist.data.ProblemSignal;
//import ru.yandex.webmaster3.storage.checklist.data.SiteProblemContent;
//import ru.yandex.webmaster3.core.checklist.data.SiteProblemState;
//import ru.yandex.webmaster3.core.checklist.data.SiteProblemTypeEnum;
//import ru.yandex.webmaster3.storage.checklist.service.SiteProblemsService;
//import ru.yandex.webmaster3.storage.host.dao.HostsCDao;
//import ru.yandex.webmaster3.worker.PeriodicTask;
//import ru.yandex.webmaster3.worker.PeriodicTaskData;
//import ru.yandex.webmaster3.core.worker.task.PeriodicTaskState;
//import ru.yandex.webmaster3.core.worker.task.TaskResult;
//import ru.yandex.webmaster3.worker.TaskSchedule;
//import ru.yandex.webmaster3.worker.yt.WebmasterYtService;
//
//import java.io.ByteArrayInputStream;
//import java.net.URL;
//import java.util.Collections;
//import java.util.EnumMap;
//import java.util.EnumSet;
//import java.util.HashMap;
//import java.util.List;
//import java.util.Map;
//import java.util.Set;
//import java.util.function.Function;
//import java.util.stream.Collectors;
//
///**
// * @author avhaliullin
// */
//public class UpdateMicrodataStatsTask extends PeriodicTask<UpdateMicrodataStatsTask.TD, PeriodicTaskState> {
//    private static final Logger log = LoggerFactory.getLogger(UpdateMicrodataStatsTask.class);
//
//    private static final int LOGGING_DENSITY = 100;
//    private static final int BUFFER_SIZE = 100;
//    private static final int SAMPLES_LIMIT = 1000;
//    private static final String TABLE = "/export/semantic_validator";
//
//    private static final Set<MicrodataDocStatus> PROBLEM_STATUSES = EnumSet.of(MicrodataDocStatus.INVALID, MicrodataDocStatus.WARNING);
//    private static final int BAD_DOCS_PERCENT = 5;
//
//    private HostsCDao hostsCDao;
//    private WebmasterYtService webmasterYtService;
//    private String ytPathPrefix;
//    private SiteProblemsService siteProblemsService;
//    private MicrodataErrorSamplesCDao microdataErrorSamplesCDao;
//
//    @Override
//    public Result run() throws Exception {
//        final Map<WebmasterHostId, Pair<ProblemSignal, Map<MicrodataDocStatus, List<String>>>> problemsBuffer = new HashMap<>();
//        webmasterYtService.readBinaryLines(ytPathPrefix + TABLE, (data, valuePositions, lineIndex) -> {
//            {
//                WebmasterYtService.ArrayPosition position = valuePositions.get("key");
//                String hostUrl = new String(data, position.getOffset(), position.getLength());
//                WebmasterHostId hostId = IdUtils.urlToHostId(hostUrl);
//                if (lineIndex % LOGGING_DENSITY == 0) {
//                    log.info("Processing host {}, line {}", hostId, lineIndex);
//                }
//                WebmasterYtService.ArrayPosition vPos = valuePositions.get("value");
//                HostMicrodataStats info = MicrodataStatsConverter.convertFromProto(
//                        Semantic.HostReportInfo.parseFrom(
//                                new ByteArrayInputStream(data, vPos.getOffset(), vPos.getLength())
//                        ),
//                        PROBLEM_STATUSES
//                );
//                DateTime now = DateTime.now();
//                long urlsWithMicrodata =
//                        info.getDocsInStatusCount().entrySet().stream().filter(entry -> {
//                            MicrodataDocStatus status = entry.getKey();
//                            return status != MicrodataDocStatus.ABORTED && status != MicrodataDocStatus.NO_MARKUP;
//                        }).collect(Collectors.summarizingLong(Map.Entry::getValue)).getSum();
//                long urlsWithProblems =
//                        info.getDocsInStatusCount().entrySet().stream().filter(entry -> {
//                            MicrodataDocStatus status = entry.getKey();
//                            return PROBLEM_STATUSES.contains(status);
//                        }).collect(Collectors.summarizingLong(Map.Entry::getValue)).getSum();
//                ProblemSignal problem;
//                Map<MicrodataDocStatus, List<String>> samples;
//                if (urlsWithMicrodata > 0 && 100 * urlsWithProblems > urlsWithMicrodata * BAD_DOCS_PERCENT) {
//                    problem = new ProblemSignal(new SiteProblemContent.MicrodataErrors(), now);
//                    samples = info.getUrlExamples();
//                } else {
//                    problem = new ProblemSignal(SiteProblemTypeEnum.MICRODATA_ERRORS, SiteProblemState.ABSENT, now);
//                    samples = PROBLEM_STATUSES.stream().collect(Collectors.toMap(Function.identity(), s -> Collections.emptyList()));
//                }
//                problemsBuffer.put(hostId, Pair.of(problem, samples));
//            }
//            if (problemsBuffer.size() >= BUFFER_SIZE) {
//                for (WebmasterHostGeneration host : hostsCDao.getHosts(problemsBuffer.keySet())) {
//                    Pair<ProblemSignal, Map<MicrodataDocStatus, List<String>>> problemPair = problemsBuffer.get(host.getHostId());
//                    microdataErrorSamplesCDao.insertSamples(host.getHostId(), problemPair.getRight());
//                    siteProblemsService.updateRealTimeProblem(host.getHostId(), problemPair.getLeft());
//                }
//                problemsBuffer.clear();
//            }
//        });
//        if (!problemsBuffer.isEmpty()) {
//            for (WebmasterHostGeneration host : hostsCDao.getHosts(problemsBuffer.keySet())) {
//                Pair<ProblemSignal, Map<MicrodataDocStatus, List<String>>> problemPair = problemsBuffer.get(host.getHostId());
//                microdataErrorSamplesCDao.insertSamples(host.getHostId(), problemPair.getRight());
//                siteProblemsService.updateRealTimeProblem(host.getHostId(), problemPair.getLeft());
//            }
//        }
//        return new Result(TaskResult.SUCCESS);
//    }
//
//    @Override
//
//    public TaskSchedule getSchedule() {
//        return TaskSchedule.startByCron("0 7 * * * *");
//    }
//
//    @Override
//    public Class<TD> getDataClass() {
//        return TD.class;
//    }
//
//    public static class TD extends PeriodicTaskData {
//        @Override
//        public WorkerTaskType getTaskType() {
//            return WorkerTaskType.UPDATE_MICRODATA_STATS;
//        }
//    }
//
//    @Required
//    public void setHostsCDao(HostsCDao hostsCDao) {
//        this.hostsCDao = hostsCDao;
//    }
//
//    @Required
//    public void setWebmasterYtService(WebmasterYtService webmasterYtService) {
//        this.webmasterYtService = webmasterYtService;
//    }
//
//    @Required
//    public void setYtPathPrefix(String ytPathPrefix) {
//        this.ytPathPrefix = ytPathPrefix;
//    }
//
//    @Required
//    public void setSiteProblemsService(SiteProblemsService siteProblemsService) {
//        this.siteProblemsService = siteProblemsService;
//    }
//
//    @Required
//    public void setMicrodataErrorSamplesCDao(MicrodataErrorSamplesCDao microdataErrorSamplesCDao) {
//        this.microdataErrorSamplesCDao = microdataErrorSamplesCDao;
//    }
//}
