package ru.yandex.webmaster3.worker.turbo.autoparser;

import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Set;
import java.util.UUID;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.collect.Range;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.joda.time.LocalDate;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;

import ru.yandex.webmaster3.core.WebmasterException;
import ru.yandex.webmaster3.core.http.WebmasterErrorResponse;
import ru.yandex.webmaster3.core.util.environment.YandexEnvironmentProvider;
import ru.yandex.webmaster3.core.util.environment.YandexEnvironmentType;
import ru.yandex.webmaster3.core.worker.task.PeriodicTaskState;
import ru.yandex.webmaster3.core.worker.task.PeriodicTaskType;
import ru.yandex.webmaster3.core.worker.task.TaskResult;
import ru.yandex.webmaster3.storage.host.CommonDataState;
import ru.yandex.webmaster3.storage.settings.SettingsService;
import ru.yandex.webmaster3.storage.turbo.dao.autoparser.TurboAutoparsedHostsSpeedsYDao;
import ru.yandex.webmaster3.storage.turbo.dao.autoparser.TurboAutoparsedHostsSpeedsYDao.OwnerSpeedInfo;
import ru.yandex.webmaster3.storage.util.yt.AsyncTableReader;
import ru.yandex.webmaster3.storage.util.yt.YtCypressService;
import ru.yandex.webmaster3.storage.util.yt.YtNode;
import ru.yandex.webmaster3.storage.util.yt.YtPath;
import ru.yandex.webmaster3.storage.util.yt.YtService;
import ru.yandex.webmaster3.storage.util.yt.YtTableReadDriver;
import ru.yandex.webmaster3.storage.yql.YqlFunctions;
import ru.yandex.webmaster3.storage.yql.YqlQueryBuilder;
import ru.yandex.webmaster3.storage.yql.YqlService;
import ru.yandex.webmaster3.worker.PeriodicTask;
import ru.yandex.webmaster3.worker.TaskSchedule;

import static ru.yandex.webmaster3.storage.host.CommonDataType.TURBO_AUTOPARSED_HOSTS_SPEEDS_LAST_UPDATE;
import static ru.yandex.webmaster3.storage.yql.YqlProtoDescriptors.HOSTSPEED_HIST_MESSAGE;
import static ru.yandex.webmaster3.storage.yql.YqlProtoDescriptors.HOSTSPEED_PROTO_DESCRIPTOR;

/**
 * Created by Oleg Bazdyrev on 2019-07-17.
 */
@Slf4j
@Component
@RequiredArgsConstructor(onConstructor_ = @Autowired)
public class ImportAutoparsedHostsSpeedsTask extends PeriodicTask<PeriodicTaskState> {

    private static final Pattern SOURCE_TABLE_PATTERN = Pattern.compile("(\\d{4}-\\d{2}-\\d{2})\\.host");
    private static final DateTimeFormatter DAY_TABLE_FORMAT = DateTimeFormat.forPattern("yyyy-MM-dd");
    private static final int OLD_TABLES_COUNT = 40;
    private static final int AGGREGATE_TABLES_COUNT = 28;
    private static final int MIN_VISIT_COUNT = 10;

    private final SettingsService settingsService;
    private final TurboAutoparsedHostsSpeedsYDao turboAutoparsedHostsSpeedsYDao;
    private final YtService ytService;
    private final YqlService yqlService;

    @Value("${webmaster3.worker.turbo.autoparser.hostspeed.sourceDir}")
    private final YtPath sourceDir;
    @Value("${webmaster3.worker.turbo.autoparser.hostspeed.workDir}/converted")
    private final YtPath convertedTablesDir;
    @Value("${webmaster3.worker.turbo.autoparser.hostspeed.workDir}/aggregated")
    private final YtPath aggregatedTablePath;

    @Override
    public Result run(UUID runId) throws Exception {
        ytService.withoutTransaction(cypressService -> {
            // convert raw data
            LocalDate date = convertTables(cypressService);
            // aggregate data
            CommonDataState cds = settingsService.getSettingUncached(TURBO_AUTOPARSED_HOSTS_SPEEDS_LAST_UPDATE);
            if (cds == null || date.isAfter(DAY_TABLE_FORMAT.parseLocalDate(cds.getValue()))) {
                aggregateData(cypressService, date);
                importData(cypressService, date);
                settingsService.update(TURBO_AUTOPARSED_HOSTS_SPEEDS_LAST_UPDATE, DAY_TABLE_FORMAT.print(date));
            }
            cleanOldTables(cypressService);
            return true;
        });
        return new Result(TaskResult.SUCCESS);
    }

    private void aggregateData(YtCypressService cypressService, LocalDate date) {
        // аггрегируем только в проде, чтобы не было конфликтов с транзакциями
        if (YandexEnvironmentProvider.getEnvironmentType() != YandexEnvironmentType.PRODUCTION) {
            log.info("Skipping aggregateData");
            return;
        }
        // collect tables for aggregate
        LocalDate minDate = date.minusDays(AGGREGATE_TABLES_COUNT);
        YqlQueryBuilder qb = new YqlQueryBuilder();
        qb.cluster(aggregatedTablePath);
        qb.appendText("INSERT INTO").appendTable(aggregatedTablePath).appendText("WITH TRUNCATE\n");
        qb.appendText("SELECT Owner, NonTurboTotalTime, NonTurboTotalCount, TurboTotalTime, TurboTotalCount FROM (");
        qb.appendText("SELECT Owner,");
        qb.appendText("SUM_IF(TotalTime, NOT Turbo) as NonTurboTotalTime,");
        qb.appendText("SUM_IF(TotalCount, NOT Turbo) as NonTurboTotalCount,");
        qb.appendText("SUM_IF(TotalTime, Turbo) as TurboTotalTime,");
        qb.appendText("SUM_IF(TotalCount, Turbo) as TurboTotalCount");
        qb.appendText("FROM RANGE(").appendTable(convertedTablesDir);
        qb.appendText(",'" + DAY_TABLE_FORMAT.print(minDate) + "','" + DAY_TABLE_FORMAT.print(date) + "')");
        qb.appendText("GROUP BY Url::GetOwner(Domain) as Owner \n");
        qb.appendText(") WHERE NonTurboTotalCount >=" + MIN_VISIT_COUNT + ";");

        yqlService.execute(qb.build());
    }

    private void importData(YtCypressService cypressService, LocalDate date) {
        AsyncTableReader<DomainSpeedInfoRow> tableReader = new AsyncTableReader<>(cypressService, aggregatedTablePath,
                Range.all(), YtTableReadDriver.createYSONDriver(DomainSpeedInfoRow.class))
                .withThreadName("domain-speeds-reader");
        try (AsyncTableReader.TableIterator<DomainSpeedInfoRow> iterator = tableReader.read()) {
            List<OwnerSpeedInfo> data = new ArrayList<>();
            while (iterator.hasNext()) {
                DomainSpeedInfoRow row = iterator.next();
                OwnerSpeedInfo info = row.toOwnerSpeedInfo(date);
                data.add(info);
                if (data.size() >= 500) {
                    turboAutoparsedHostsSpeedsYDao.insert(data);
                    data.clear();
                }
            }
            turboAutoparsedHostsSpeedsYDao.insert(data);
        } catch (Exception e) {
            throw new WebmasterException("YT error", new WebmasterErrorResponse.YTServiceErrorResponse(getClass(), e), e);
        }
    }

    private LocalDate convertTables(YtCypressService cypressService) throws InterruptedException {
        Set<String> sourceTables = cypressService.list(sourceDir).stream().map(YtPath::getName)
                .map(SOURCE_TABLE_PATTERN::matcher).filter(Matcher::matches).map(m -> m.group(1))
                .collect(Collectors.toSet());
        cypressService.create(convertedTablesDir, YtNode.NodeType.MAP_NODE, true, null, true);
        Set<String> convertedTables = cypressService.list(convertedTablesDir).stream()
                .map(YtPath::getName).collect(Collectors.toSet());

        LocalDate result = sourceTables.stream().map(DAY_TABLE_FORMAT::parseLocalDate)
                .max(Comparator.naturalOrder()).orElse(null);
        sourceTables.removeAll(convertedTables);
        // интересны только свежие таблицы
        if (!convertedTables.isEmpty()) {
            LocalDate lastConvertedDate = convertedTables.stream().max(Comparator.naturalOrder()).map(DAY_TABLE_FORMAT::parseLocalDate).orElseThrow();
            String minTableName = lastConvertedDate.minusDays(OLD_TABLES_COUNT).toString(DAY_TABLE_FORMAT);
            sourceTables.removeIf(s -> s.compareTo(minTableName) < 0);
        }

        // конвертим таблицы только в проде
        if (sourceTables.isEmpty() || YandexEnvironmentProvider.getEnvironmentType() != YandexEnvironmentType.PRODUCTION) {
            return result;
        }

        YqlQueryBuilder qb = new YqlQueryBuilder();
        qb.cluster(convertedTablesDir);
        for (String table : sourceTables) {
            qb.appendText("INSERT INTO ").appendTable(YtPath.path(convertedTablesDir, table));
            qb.appendText("WITH TRUNCATE\n");
            qb.appendText("SELECT Domain, Turbo, SUM(TotalTime) * 100 as TotalTime, SUM(TotalCount) as TotalCount FROM (\n");
            qb.appendText("SELECT").appendFCall(YqlFunctions.cutWwwAndM("Yson::ConvertToString(Name)"));
            qb.appendText("._2 as Domain, WeakField(Turbo, 'Bool', false) as Turbo, \n");
            qb.appendText("Coalesce(ListSum(ListMap(");
            qb.appendFCall(YqlFunctions.parseProto(HOSTSPEED_HIST_MESSAGE, HOSTSPEED_PROTO_DESCRIPTOR, "WeakField(Hist, 'String')"));
            qb.appendText(".Values, ($entry) -> { return IF($entry.key > 300, 0, $entry.key * $entry.value); })), 0) as TotalTime,\n");
            qb.appendText("Coalesce(ListSum(ListMap(");
            qb.appendFCall(YqlFunctions.parseProto(HOSTSPEED_HIST_MESSAGE, HOSTSPEED_PROTO_DESCRIPTOR, "WeakField(Hist, 'String')"));
            qb.appendText(".Values, ($entry) -> { return IF($entry.key > 300, 0, $entry.value); })), 0) as TotalCount\n");
            qb.appendText("FROM").appendTable(YtPath.path(sourceDir, table + ".host"));
            qb.appendText("\nWHERE Yson::ConvertToInt64(EventType) == 3178 AND Yson::ConvertToString(DeviceType) == 'MOBILE'\n");
            qb.appendText(") GROUP BY Domain, Turbo;\n\n");
        }

        log.info("Converting tables: {}", sourceTables);

        yqlService.execute(qb.build());

        return result;
    }

    private void cleanOldTables(YtCypressService cypressService) throws InterruptedException {
        if (YandexEnvironmentProvider.getEnvironmentType() != YandexEnvironmentType.PRODUCTION) {
            log.info("Skipping cleanOldTables");
            return;
        }
        cypressService.list(convertedTablesDir).stream().sorted(Comparator.reverseOrder()).skip(OLD_TABLES_COUNT)
                .forEach(cypressService::remove);
    }

    @Override
    public PeriodicTaskType getType() {
        return PeriodicTaskType.IMPORT_TURBO_AUTOPARSED_HOSTS_SPEEDS;
    }

    @Override
    public TaskSchedule getSchedule() {
        return TaskSchedule.startByCron("0 42 * * * *");
    }

    private static final class DomainSpeedInfoRow {

        private final String owner;
        private final long nonTurboTotalTime;
        private final long nonTurboTotalCount;
        private final long turboTotalTime;
        private final long turboTotalCount;

        @JsonCreator
        public DomainSpeedInfoRow(
                @JsonProperty("Owner") String owner,
                @JsonProperty("NonTurboTotalTime") Long nonTurboTotalTime,
                @JsonProperty("NonTurboTotalCount") Long nonTurboTotalCount,
                @JsonProperty("TurboTotalTime") Long turboTotalTime,
                @JsonProperty("TurboTotalCount") Long turboTotalCount) {
            this.owner = owner;
            this.nonTurboTotalTime = nonTurboTotalTime == null ? 0 : nonTurboTotalTime;
            this.nonTurboTotalCount = nonTurboTotalCount == null ? 0 : nonTurboTotalCount;
            this.turboTotalTime = turboTotalTime == null ? 0 : turboTotalTime;
            this.turboTotalCount = turboTotalCount == null ? 0 : turboTotalCount;
        }

        public OwnerSpeedInfo toOwnerSpeedInfo(LocalDate date) {
             return new OwnerSpeedInfo(owner,
                    nonTurboTotalCount < MIN_VISIT_COUNT ? null : nonTurboTotalTime / (double) nonTurboTotalCount,
                    turboTotalCount < MIN_VISIT_COUNT ? null : turboTotalTime / (double) turboTotalCount,
                    date.toDateTimeAtStartOfDay());
        }
    }
}
