package ru.yandex.webmaster3.streamer.robot;

import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.zip.InflaterInputStream;

import NKwYT.Queries;
import com.google.protobuf.InvalidProtocolBufferException;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.Value;
import lombok.extern.slf4j.Slf4j;

import ru.yandex.kikimr.persqueue.consumer.StreamListener;
import ru.yandex.webmaster3.core.logbroker.reader.IDataProcessing;
import ru.yandex.webmaster3.core.logbroker.reader.MessageContainer;
import ru.yandex.webmaster3.core.solomon.SolomonSensor;
import ru.yandex.webmaster3.core.solomon.metric.SolomonCounter;
import ru.yandex.webmaster3.core.solomon.metric.SolomonKey;
import ru.yandex.webmaster3.core.solomon.metric.SolomonMetricRegistry;
import ru.yandex.webmaster3.core.util.TimeUtils;
import ru.yandex.webmaster3.storage.searchurl.samples.dao.RealtimeUrlHttpCodesCHDao;
import ru.yandex.webmaster3.storage.searchurl.samples.dao.RealtimeUrlHttpCodesExamplesCHDao;

import static ru.yandex.webmaster3.storage.searchurl.samples.dao.RealtimeUrlHttpCodesCHDao.Record.HttpGroup.OK;

/**
 * @author kravchenko99
 * @date 4/12/21
 */

@Slf4j
@RequiredArgsConstructor
public class RthubDataProcessing implements IDataProcessing {

    private static final int MAX_THRESHOLD = 200_000;
    private static final int MIN_THRESHOLD = 100_000;
    private static final int TIME_THRESHOLD = 60;

    private final SolomonMetricRegistry solomonMetricRegistry;
    private final RealtimeUrlHttpCodesCHDao realtimeUrlHttpCodesCHDao;
    private final RealtimeUrlHttpCodesExamplesCHDao realtimeUrlHttpCodesExamplesCHDao;
    private DataProcessingMetrics dataProcessingMetrics;
    private ClickhouseUploader<RealtimeUrlHttpCodesCHDao.Record, RealtimeUrlHttpCodesExamplesCHDao.Record> clickhouseUploader;

    private static final Set<Long> CODES_FOR_SKIP = Set.of(429L);

    public void init() {
        dataProcessingMetrics = new DataProcessingMetrics(solomonMetricRegistry);

        Consumer<List<ClickhouseUploader.UploadRecord<RealtimeUrlHttpCodesCHDao.Record,
                RealtimeUrlHttpCodesExamplesCHDao.Record>>> consumer = batch -> {
            List<RealtimeUrlHttpCodesCHDao.Record> records =
                    batch.stream().flatMap(x -> x.getRecords().stream()).collect(Collectors.toList());
            List<RealtimeUrlHttpCodesExamplesCHDao.Record> examples =
                    batch.stream().flatMap(x -> x.getExamples().stream()).collect(Collectors.toList());
//                    log.info("examples - {}", examples);
            realtimeUrlHttpCodesExamplesCHDao.addRecord(examples);

            log.info("batchSize - {}", records.size());
            realtimeUrlHttpCodesCHDao.addRecord(records);
            dataProcessingMetrics.totalSaved.add(records.size());
            batch.forEach(x -> x.getCommitter().commit());
        };

        this.clickhouseUploader = new ClickhouseUploader<>(getClass().getSimpleName(), consumer, MAX_THRESHOLD, MIN_THRESHOLD, TIME_THRESHOLD);
    }

    @SneakyThrows
    @Override
    public void process(MessageContainer messageContainer) {
//        log.info("size - {}", messageContainer.getRawMessages().size());
        var rthubUploadRecord = processBatch(messageContainer);
        if (rthubUploadRecord == null) {
            return;
        }

        clickhouseUploader.putRecord(rthubUploadRecord);
    }

    private ClickhouseUploader.UploadRecord<RealtimeUrlHttpCodesCHDao.Record,
            RealtimeUrlHttpCodesExamplesCHDao.Record> processBatch(MessageContainer messageContainer) {

        List<RealtimeUrlHttpCodesCHDao.Record> records = new ArrayList<>();
        List<RealtimeUrlHttpCodesExamplesCHDao.Record> examples = new ArrayList<>();
        try {
            dataProcessingMetrics.totalRead.add(messageContainer.getRawMessages().size());
            for (byte[] compressedRawMessage : messageContainer.getRawMessages()) {
                try (InputStream inputStream =
                             new InflaterInputStream(new ByteArrayInputStream(compressedRawMessage))) {
                    byte[] rawMessage = inputStream.readAllBytes();

                    final Queries.TWebmasterAlertItem record = Queries.TWebmasterAlertItem.parseFrom(rawMessage);
                    final RealtimeUrlHttpCodesCHDao.Record row;
                    try {
                        row = RealtimeUrlHttpCodesCHDao.Record.of(record.getUrl(),
                                TimeUtils.toHourInterval(record.getLastAccess() * 1000),
                                record.getHttpCode(), 1);
                    } catch (Exception e) {
                        dataProcessingMetrics.unparsedUrl.add(1);
                        continue;
                    }

//
//                    if (!allHostsCacheService.contains(row.getHostId())) {
//                        dataProcessingMetrics.hostNotInWebmaster.add(1);
//                        continue;
//                    }
                    if (record.getLastAccess() == 0) {
                        continue;
                    }
                    if (CODES_FOR_SKIP.contains(record.getHttpCode())) {
                        continue;
                    }

                    records.add(row);

                    if (row.getHttpGroup() != OK) {
                        final RealtimeUrlHttpCodesExamplesCHDao.Record rowExample =
                                RealtimeUrlHttpCodesExamplesCHDao.Record.of(
                                        record.getUrl(),
                                        TimeUtils.toHourInterval(record.getLastAccess() * 1000),
                                        record.getHttpCode(),
                                        record.getLastAccess() * 1000
                                );
                        examples.add(rowExample);
                    }

                } catch (InvalidProtocolBufferException e) {
                    log.error("Error in parsing robot data.", e);
                    dataProcessingMetrics.unparsedRecord.add(1);
                }
            }

            List<RealtimeUrlHttpCodesCHDao.Record> processedRecords = records.stream()
                    .collect(Collectors.groupingBy(Function.identity(), Collectors.counting()))
                    .entrySet()
                    .stream()
                    .map(x -> new RealtimeUrlHttpCodesCHDao.Record(x.getKey().getHostId(),
                            x.getKey().getHourInterval(), x.getKey().getHttpGroup(), x.getValue()))
                    .collect(Collectors.toList());
            var readResponder = messageContainer.getReadResponder();
            return new ClickhouseUploader.UploadRecord<>(readResponder::commit, processedRecords,
                    examples);
        } catch (Exception exp) {
            log.error("Error was occurred {}", exp.getMessage(), exp);
            dataProcessingMetrics.failed.add(messageContainer.getRawMessages().size());
        }
        return null;
    }


    @Value
    public static class DataProcessingMetrics {
        private static final String MAIN_LABEL = "rthub_webmaster_urlalert";
        private static final String OPERATION_LABEL = "operation_type";
        private static final String TOTAL_READ = "read";
        private static final String TOTAL_SAVED = "saved";
        private static final String STATISTIC_SAVED = "statistic_saved";
        private static final String UNPARSED_RECORD = "unparsed_record";
        private static final String UNPARSED_URL = "unparsed_url";
        private static final String OTHER_EXCEPTION = "problem_with_processing";
        private static final String HOST_NOT_IN_WEBMASTER = "host_not_in_webmaster";
        private static final String SIZE_LIMIT = "filtered_by_size_limit";


        SolomonCounter totalRead;
        SolomonCounter totalSaved;
        SolomonCounter unparsedRecord;
        SolomonCounter failed;
        SolomonCounter filteredBySizeLimit;
        SolomonCounter hostNotInWebmaster;
        SolomonCounter unparsedUrl;

        public DataProcessingMetrics(SolomonMetricRegistry solomonMetricRegistry) {
            final SolomonKey baseKey = SolomonKey.create(SolomonSensor.LABEL_CATEGORY, MAIN_LABEL);
            totalRead = solomonMetricRegistry.createSimpleCounter(baseKey.withLabel(OPERATION_LABEL, TOTAL_READ), 1.0);
            totalSaved = solomonMetricRegistry.createSimpleCounter(baseKey.withLabel(OPERATION_LABEL, TOTAL_SAVED),
                    1.0);
            unparsedRecord = solomonMetricRegistry.createSimpleCounter(baseKey.withLabel(OPERATION_LABEL,
                    UNPARSED_RECORD), 1.0);
            failed = solomonMetricRegistry.createSimpleCounter(baseKey.withLabel(OPERATION_LABEL, OTHER_EXCEPTION),
                    1.0);
            filteredBySizeLimit = solomonMetricRegistry.createSimpleCounter(baseKey.withLabel(OPERATION_LABEL,
                    SIZE_LIMIT), 1.0);
            hostNotInWebmaster = solomonMetricRegistry.createSimpleCounter(baseKey.withLabel(OPERATION_LABEL,
                    HOST_NOT_IN_WEBMASTER), 1.0);
            unparsedUrl = solomonMetricRegistry.createSimpleCounter(baseKey.withLabel(OPERATION_LABEL,
                    UNPARSED_URL), 1.0);
        }
    }
}
