package ru.yandex.webmaster3.streamer.robot;

import NMercury.PushedUrlsLog;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.protobuf.InvalidProtocolBufferException;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.Value;
import lombok.extern.slf4j.Slf4j;
import org.joda.time.LocalDate;
import org.joda.time.LocalDateTime;
import org.springframework.beans.factory.annotation.Autowired;
import ru.yandex.kikimr.persqueue.consumer.StreamListener;
import ru.yandex.webmaster3.core.data.WebmasterHostId;
import ru.yandex.webmaster3.core.logbroker.reader.IDataProcessing;
import ru.yandex.webmaster3.core.logbroker.reader.MessageContainer;
import ru.yandex.webmaster3.core.solomon.SolomonSensor;
import ru.yandex.webmaster3.core.solomon.metric.SolomonCounter;
import ru.yandex.webmaster3.core.solomon.metric.SolomonKey;
import ru.yandex.webmaster3.core.solomon.metric.SolomonMetricRegistry;
import ru.yandex.webmaster3.core.util.IdUtils;
import ru.yandex.webmaster3.core.util.TimeUtils;
import ru.yandex.webmaster3.storage.host.AllHostsCacheService;
import ru.yandex.webmaster3.storage.searchbase.SearchBaseUpdatesService;
import ru.yandex.webmaster3.storage.searchbase.dao.MonsterHostsYDao;
import ru.yandex.webmaster3.storage.searchurl.samples.dao.SearchUrlFreshStatisticsCHDao;
import ru.yandex.webmaster3.storage.searchurl.samples.dao.SearchUrlFreshUrlSampleCHDao;
import ru.yandex.webmaster3.storage.util.clickhouse2.condition.Condition;
import ru.yandex.wmtools.common.util.uri.URI2;
import ru.yandex.wmtools.common.util.uri.UriUtils;

import javax.annotation.PostConstruct;
import java.util.*;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;

import static java.util.stream.Collectors.counting;

/**
 * ishalaru
 * 04.03.2021
 **/
@Slf4j
@RequiredArgsConstructor(onConstructor_ = @Autowired)
public class MercuryDataProcessing implements IDataProcessing {
    private static final int MAX_EXAMPLES = 100_000;
    private static final int MAX_THRESHOLD = 100_000;

    // кажется, оно должно быть меньше чем maxUnconsumedReads в настройках читателя,
    // иначе в теории возможна ситуация, когда число неподтвержденных сообщений превысит maxUnconsumedReads
    private static final int MIN_THRESHOLD = 5_000;
    private static final int TIME_THRESHOLD = 30;

    private final AllHostsCacheService allHostsCacheService;
    private final SearchUrlFreshUrlSampleCHDao searchUrlFreshUrlSampleCHDao;
    private final SearchUrlFreshStatisticsCHDao searchUrlFreshStatisticsCHDao;
    private final SearchBaseUpdatesService searchBaseUpdatesService;
    private final SolomonMetricRegistry solomonMetricRegistry;
    private final MonsterHostsYDao monsterHostsYDao;
    private DataProcessingMetrics dataProcessingMetrics;
    private final Set<WebmasterHostId> monsterCache = new HashSet<>();
    private final LoadingCache<WebmasterHostId, Long> hostCountLimiter = CacheBuilder.newBuilder()
            .maximumSize(300_000)
            .expireAfterAccess(30, TimeUnit.MINUTES)
            .build(new CacheLoader<>() {
                @Override
                public Long load(WebmasterHostId key) {
                    return searchUrlFreshUrlSampleCHDao.getSamplesCount(key, Condition.trueCondition());
                }
            });

    private final ClickhouseUploader<SearchUrlFreshUrlSampleCHDao.Row, Object> clickhouseUploader = new ClickhouseUploader<>(
            getClass().getSimpleName(), this::uploadBatch, MAX_THRESHOLD, MIN_THRESHOLD, TIME_THRESHOLD);

    @PostConstruct
    public void init() {
        dataProcessingMetrics = new DataProcessingMetrics(solomonMetricRegistry);
        monsterCache.addAll(monsterHostsYDao.selectAll());
    }

    @Override
    @SneakyThrows
    public void process(MessageContainer messageContainer) {
        var rowObjectUploadRecord = processBatch(messageContainer);
        if (rowObjectUploadRecord == null) {
            messageContainer.commit();
            return;
        }

        clickhouseUploader.putRecord(rowObjectUploadRecord);
    }

    private ClickhouseUploader.UploadRecord<SearchUrlFreshUrlSampleCHDao.Row, Object> processBatch(MessageContainer messageContainer) {
        List<SearchUrlFreshUrlSampleCHDao.Row> sampleRows = new ArrayList<>();
        var baseCollectionDate = searchBaseUpdatesService.getSearchBaseUpdates().getCurrentBase().getBaseCollectionDate();
        int invalidTime = 0;
        int missingHostCache = 0;
        int invalidTitle = 0;

        try {
            dataProcessingMetrics.totalRead.add(messageContainer.getRawMessages().size());
            for (byte[] rawMessage : messageContainer.getRawMessages()) {
                try {
                    var urlLogRecord = PushedUrlsLog.TPushedUrlLogRecord.parseFrom(rawMessage);
                    var sampleRow = processRecord(urlLogRecord);
                    if (sampleRow == null) {
                        dataProcessingMetrics.unparsedRecord.add(1);
                        continue;
                    }

                    if (monsterCache.contains(sampleRow.getHostId())) {
                        dataProcessingMetrics.skipByMonsterHost.add(1);
                        continue;
                    }

                    if (hostCountLimiter.get(sampleRow.getHostId()) > MAX_EXAMPLES) {
                        dataProcessingMetrics.filteredBySizeLimit.add(1);
                        continue;
                    }

                    boolean hasTitleRawUTF8 = urlLogRecord.hasTitleRawUTF8();
                    invalidTitle += hasTitleRawUTF8 ? 0 : 1;
                    boolean isAfterCurJupiterBase = urlLogRecord.getSearchAddTime() / 1000L > baseCollectionDate.toDateTime().getMillis();
                    invalidTime += isAfterCurJupiterBase ? 0 : 1;
                    boolean hostInCache = allHostsCacheService.contains(sampleRow.getHostId());
                    missingHostCache += hostInCache ? 0 : 1;
                    if (hasTitleRawUTF8 && isAfterCurJupiterBase && hostInCache) {
                        sampleRows.add(sampleRow);
                    }
                } catch (InvalidProtocolBufferException e) {
                    log.info("Error in parsing robot data.", e);
                }
            }

            var readResponder = messageContainer.getReadResponder();
            return sampleRows.isEmpty()? null : new ClickhouseUploader.UploadRecord<>(readResponder::commit, sampleRows, List.of());
        } catch (Exception exp) {
            log.error(exp.getMessage(), exp);
            dataProcessingMetrics.processingFailed.add(messageContainer.getRawMessages().size());
        } finally {
            dataProcessingMetrics.skipByMissingCache.add(missingHostCache);
            dataProcessingMetrics.skipByInvalidSamovarTime.add(invalidTime);
            dataProcessingMetrics.skipByTitleIsNull.add(invalidTitle);
        }

        return null;
    }

    private SearchUrlFreshUrlSampleCHDao.Row processRecord(PushedUrlsLog.TPushedUrlLogRecord record) {
        SearchUrlFreshUrlSampleCHDao.Row.RowBuilder builder = SearchUrlFreshUrlSampleCHDao.Row.builder();

        try {
            final URI2 hostUri = UriUtils.toUri(record.getUrl(), UriUtils.UriFeature.DEFAULT_SCHEME_HTTP,
                    UriUtils.UriFeature.USE_PUNYCODED_HOSTNAME);
            builder.hostId(IdUtils.fromUri2(hostUri));
            if (hostUri.getQueryParams().size() == 0){
                builder.path(hostUri.getPath() );
            } else {
                builder.path(hostUri.getPath() + "?" + hostUri.getQuery());
            }
        } catch (Exception e) {
            log.error("exception in parsing url {}", record.getUrl(), e);
            return null;
        }

        builder.actionType(2 - record.getAction().getNumber());
        // SearchAddTime внезапно в наносекундах, а все остальное - в секундах
        LocalDateTime addTime = new LocalDateTime(record.getSearchAddTime() / 1000L, TimeUtils.EUROPE_MOSCOW_ZONE);
        builder.addTime(addTime);
        builder.title(record.getTitleRawUTF8());
        builder.lastAccess(new LocalDateTime(record.getLastAccess() * 1000L, TimeUtils.EUROPE_MOSCOW_ZONE));
        builder.relCanonicalTarget(record.getRelCanonicalTarget());
        builder.httpCode(record.getHttpCode());
        builder.insetTime(addTime);
        builder.validFromMetrikaLastAccess(record.getValidFromMetrikaLastAccess());
        builder.validFromIndexNowLastAccess(record.getValidFromIndexNowLastAccess());

        return builder.build();

    }

    private void uploadBatch(List<ClickhouseUploader.UploadRecord<SearchUrlFreshUrlSampleCHDao.Row, Object>> batch) {
        try {
            var list = batch.stream().flatMap(x -> x.getRecords().stream()).collect(Collectors.toList());
            searchUrlFreshUrlSampleCHDao.addRecords(list);
            dataProcessingMetrics.totalSaved.add(list.size());

            Map<WebmasterHostId, Long> collect = list.stream().collect(
                    Collectors.groupingBy(SearchUrlFreshUrlSampleCHDao.Row::getHostId, counting()));
            searchUrlFreshStatisticsCHDao.addRecord(LocalDate.now(), collect);
            dataProcessingMetrics.statisticsSaved.add(collect.size());

            batch.forEach(x -> x.getCommitter().commit());
        } catch (Exception e) {
            log.error("Error saving to CH", e);
            dataProcessingMetrics.saveFailed.add(batch.size());
        }
    }

    @Value
    public static class DataProcessingMetrics {
        private static final String MAIN_LABEL = "fresh";
        private static final String OPERATION_LABEL = "operation_type";
        private static final String TOTAL_READ = "read";
        private static final String TOTAL_SAVED = "saved";
        private static final String STATISTIC_SAVED = "statistic_saved";
        private static final String UNPARSED_RECORD = "unparsed";
        private static final String FILTERED_BY_MISSING_CACHE = "filtered_missing_host_cache";
        private static final String FILTERED_BY_TOO_OLD_ADD_TIME = "filtered_too_old_samovar_add_time";
        private static final String FILTRED_BY_TITLE_IS_NULL = "filtered_title_is_null";
        private static final String PROCESSING_EXCEPTION = "problem_with_processing";
        private static final String SAVING_EXCEPTION = "problem_with_saving";

        SolomonCounter totalRead;
        SolomonCounter totalSaved;
        SolomonCounter statisticsSaved;
        SolomonCounter unparsedRecord;
        SolomonCounter skipByMissingCache;
        SolomonCounter skipByInvalidSamovarTime;
        SolomonCounter skipByTitleIsNull;
        SolomonCounter skipByMonsterHost;
        SolomonCounter processingFailed;
        SolomonCounter saveFailed;
        SolomonCounter filteredBySizeLimit;

        public DataProcessingMetrics(SolomonMetricRegistry solomonMetricRegistry) {
            final SolomonKey baseKey = SolomonKey.create(SolomonSensor.LABEL_CATEGORY, MAIN_LABEL);
            totalRead = solomonMetricRegistry.createSimpleCounter(baseKey.withLabel(OPERATION_LABEL, TOTAL_READ), 1.0);
            totalSaved = solomonMetricRegistry.createSimpleCounter(baseKey.withLabel(OPERATION_LABEL, TOTAL_SAVED), 1.0);
            statisticsSaved = solomonMetricRegistry.createSimpleCounter(baseKey.withLabel(OPERATION_LABEL, STATISTIC_SAVED), 1.0);
            unparsedRecord = solomonMetricRegistry.createSimpleCounter(baseKey.withLabel(OPERATION_LABEL, UNPARSED_RECORD), 1.0);
            skipByMonsterHost = solomonMetricRegistry.createSimpleCounter(baseKey.withLabel(OPERATION_LABEL, "filtered_by_monsters_host"), 1.0);
            skipByMissingCache = solomonMetricRegistry.createSimpleCounter(baseKey.withLabel(OPERATION_LABEL, FILTERED_BY_MISSING_CACHE), 1.0);
            skipByInvalidSamovarTime = solomonMetricRegistry.createSimpleCounter(baseKey.withLabel(OPERATION_LABEL, FILTERED_BY_TOO_OLD_ADD_TIME), 1.0);
            skipByTitleIsNull = solomonMetricRegistry.createSimpleCounter(baseKey.withLabel(OPERATION_LABEL, FILTRED_BY_TITLE_IS_NULL), 1.0);
            processingFailed = solomonMetricRegistry.createSimpleCounter(baseKey.withLabel(OPERATION_LABEL, PROCESSING_EXCEPTION), 1.0);
            saveFailed = solomonMetricRegistry.createSimpleCounter(baseKey.withLabel(OPERATION_LABEL, SAVING_EXCEPTION), 1.0);
            filteredBySizeLimit = solomonMetricRegistry.createSimpleCounter(baseKey.withLabel(OPERATION_LABEL, "filtered_by_size_limit"), 1.0);
        }
    }
}
