package ru.yandex.webmaster3.worker.feeds.statistics;

import java.util.ArrayList;
import java.util.EnumMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.UUID;
import java.util.stream.Collectors;

import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.text.StrSubstitutor;
import org.joda.time.LocalDate;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;

import ru.yandex.webmaster3.core.feeds.feed.NativeFeedType;
import ru.yandex.webmaster3.core.worker.task.PeriodicTaskState;
import ru.yandex.webmaster3.core.worker.task.PeriodicTaskType;
import ru.yandex.webmaster3.core.worker.task.TaskResult;
import ru.yandex.webmaster3.storage.feeds.statistics.FeedsDomainEnrichmentYDao;
import ru.yandex.webmaster3.storage.feeds.statistics.FeedsDomainEnrichmentYDao.DomainEnrichmentInfo;
import ru.yandex.webmaster3.storage.host.CommonDataType;
import ru.yandex.webmaster3.storage.settings.SettingsService;
import ru.yandex.webmaster3.storage.settings.data.AbstractCommonDataState;
import ru.yandex.webmaster3.storage.util.yt.YtPath;
import ru.yandex.webmaster3.storage.util.yt.YtService;
import ru.yandex.webmaster3.storage.yql.YqlService;
import ru.yandex.webmaster3.worker.PeriodicTask;
import ru.yandex.webmaster3.worker.TaskSchedule;

/**
 * Created by Oleg Bazdyrev on 11/03/2022.
 */
@Slf4j
@Service
@RequiredArgsConstructor(onConstructor_ = @Autowired)
public class PrepareFeedsClicksStatisticsTask extends PeriodicTask<PrepareFeedsClicksStatisticsTask.TaskState> {

    private final static int MAX_OFFER_BASE_TABLE_AGE = 14;

    private final FeedsDomainEnrichmentYDao feedsDomainEnrichmentYDao;
    private final SettingsService settingsService;
    private final YqlService yqlService;
    private final YtService ytService;

    @Value("${webmaster3.worker.feeds.offer_base_full.path}")
    private final YtPath offerBaseTable;
    @Value("${webmaster3.worker.feeds.user_sessions.path}")
    private final YtPath userSessionsDir;
    @Value("${external.yt.service.hahn.root.default}/feeds/clicks")
    private final YtPath workDir;

    @Override
    public Result run(UUID runId) throws Exception {
        setState(new TaskState());

        LocalDate today = LocalDate.now();
        // create copy of offer base full
        YtPath offerBaseCopyPath = YtPath.path(workDir, "offer_base_full/" + today);
        ytService.withoutTransaction(cypressService -> {
            if (!cypressService.exists(offerBaseCopyPath)) {
                StrSubstitutor substitutor = new StrSubstitutor(Map.of(
                        "SRC_TABLE", offerBaseTable.toYqlPath(),
                        "DST_TABLE", offerBaseCopyPath.toYqlPath()
                ));
                yqlService.execute(substitutor.replace("""
                    use hahn;
                    INSERT INTO ${DST_TABLE} WITH TRUNCATE
                    SELECT nvl(url_with_utm, url) as url, `sets`, feed_url FROM ${SRC_TABLE}
                    ORDER BY url;"""));
            }
            return true;
        });
        // compact and copy user session to hahn (if needed)
        LocalDate lastProcessedDate = Optional.ofNullable(settingsService.getSettingOrNull(CommonDataType.LAST_PROCESSED_USER_SESSIONS_FEEDS))
                .map(AbstractCommonDataState::getValue).map(LocalDate::parse).orElse(LocalDate.parse("2022-03-07"));
        // first unprocessed table
        LocalDate sessionDate = lastProcessedDate.plusDays(1);
        YtPath sessionTable = YtPath.path(userSessionsDir, sessionDate.toString());
        if (!ytService.withoutTransactionQuery(cypressService -> cypressService.exists(sessionTable))) {
            return Result.SUCCESS; // no data
        }
        // join session table and offer base
        // seerch for nearest offer base table (and drop old ones)
        LocalDate offerBaseDate = ytService.withoutTransactionQuery(cypressService -> {
            LocalDate result = null;
            List<YtPath> tables = cypressService.list(YtPath.path(workDir, "offer_base_full"));
            tables.sort(null);
            for (YtPath table : tables) {
                LocalDate tableDate = LocalDate.parse(table.getName());
                if (tableDate.isBefore(today.minusDays(MAX_OFFER_BASE_TABLE_AGE))) {
                    cypressService.remove(table);
                } else if (result == null && !tableDate.isBefore(sessionDate)) {
                    result = tableDate;
                }
            }
            //MAX_OFFER_BASE_TABLE_AGE
            return result;
        });

        Map<String, String> subsMap = Map.of(
                "SRC_SESSIONS", sessionTable.toYqlPath(),
                "SRC_OFFER_BASE", YtPath.path(workDir, "offer_base_full/" + offerBaseDate).toYqlPath(),
                "DST_SET_STATS", YtPath.path(workDir, "set-stats").toYqlPath(),
                "DST_OFFER_STATS", YtPath.path(workDir, "offer-stats").toYqlPath(),
                "DST_ENRICHED_STATS", YtPath.path(workDir, "enriched-stats/" + sessionDate).toYqlPath(),
                "DST_NOT_ENRICHABLE_STATS", YtPath.path(workDir, "not-enrichable-stats/" + sessionDate).toYqlPath(),
                "DATE", sessionDate.toString()
        );

        state.subsMap = subsMap;

        StrSubstitutor substitutor = new StrSubstitutor(subsMap);
        List<DomainEnrichmentInfo> batch = new ArrayList<>();
        EnumMap<NativeFeedType, EnrichmentStats> totalStats = new EnumMap<>(NativeFeedType.class);
        yqlService.query(substitutor.replace("""
                use hahn;

                $sessionsDeduplicated = (
                    SELECT ReqID, Position,
                        some(Clicks) as Clicks,
                        some(CostPlusBlockStatistics) as CostPlusBlockStatistics,
                        some(DataAvailableCostPlus2) as DataAvailableCostPlus,
                        some(DocMarkersCostPlus) as DocMarkersCostPlus,
                        some(DocMarkersSlices) as DocMarkersSlices,
                        some(Host) as Host,
                        some(Path) as Path,
                        some(Shows) as Shows
                    FROM ${SRC_SESSIONS}
                    WHERE DataAvailableCostPlus2 IS NOT NULL
                        AND RequestSource in (0,1,2,3,4,5,6) AND ResultSource in (0, 2) AND IsMetrikaRobot IS NULL
                    GROUP BY ReqID, Position
                );

                $usSets = (
                    SELECT  SetUrl,
                            DataAvailableCostPlus,
                            DocMarkersCostPlus,
                            count(*) as Shows,
                            sum(CostPlusBlockStatistics.Set.Clicks) as Clicks,
                            sum(CostPlusBlockStatistics.MoreLink.Clicks) as MoreLinkClicks,
                            sum(ListSum(ListMap(CostPlusBlockStatistics.Offers, ($o) -> { return $o.Clicks;}))) as OfferClicks
                    FROM $sessionsDeduplicated
                    WHERE CostPlusBlockStatistics.Set is not null
                    GROUP BY CostPlusBlockStatistics.Set.Host || CostPlusBlockStatistics.Set.Path as SetUrl, DataAvailableCostPlus, DocMarkersCostPlus
                );

                $obSets = (
                    SELECT Domain, Type, SetUrl
                    FROM
                    (
                        SELECT ListMap(Yson::ConvertToList(`sets`), ($s) -> { return Yson::ConvertToString($s.url);}) as `Sets`, nf.domain as Domain,
                            nf.type as Type
                        FROM ${SRC_OFFER_BASE} as obf
                        INNER JOIN `//home/webmaster/prod/export/feeds/native_feeds` as nf
                        ON obf.feed_url == nf.url
                    )
                    FLATTEN LIST BY `Sets` as SetUrl
                    GROUP BY Domain, Type, SetUrl
                );

                $obDomains = (
                    SELECT Domain, Type, Host
                    FROM ${SRC_OFFER_BASE} as obf
                    INNER JOIN `//home/webmaster/prod/export/feeds/native_feeds` as nf
                    ON obf.feed_url == nf.url
                    GROUP BY nf.domain as Domain, nf.type as Type, Url::GetSchemeHostPort(obf.url) as Host
                );

                $usOffers = (
                    SELECT OfferUrl, SetUrl, sum(Shows) as Shows, sum(Clicks) as Clicks
                    FROM
                    (
                        SELECT
                            (of.Host || of.Path) as OfferUrl,
                            (CostPlusBlockStatistics.Set.Host || CostPlusBlockStatistics.Set.Path) as SetUrl,
                            1 as Shows,
                            of.Clicks as Clicks
                        FROM $sessionsDeduplicated
                        FLATTEN LIST BY CostPlusBlockStatistics.Offers as of
                    )
                    GROUP BY OfferUrl, SetUrl
                );

                INSERT INTO ${DST_SET_STATS}
                SELECT Date('${DATE}') as `Date`, us.*, os.Domain as Domain, os.Type as Type from $usSets as us
                INNER JOIN $obSets as os
                ON String::AsciiToLower(us.SetUrl) == String::AsciiToLower(os.SetUrl)
                ORDER BY Domain, Type, `Date`, SetUrl;

                INSERT INTO ${DST_OFFER_STATS}
                SELECT Date('${DATE}') as `Date`, us.*, os.Domain as Domain, os.Type as Type  from $usOffers as us
                INNER JOIN $obSets as os
                ON String::AsciiToLower(us.SetUrl) == String::AsciiToLower(os.SetUrl)
                ORDER BY Domain, Type, `Date`, OfferUrl, SetUrl;

                $enrichedShows = (
                    SELECT SetUrl, count(*) as Shows
                    FROM ${SRC_SESSIONS}
                    WHERE DocMarkersCostPlus IS NOT NULL
                    GROUP BY CostPlusBlockStatistics.Set.Host || CostPlusBlockStatistics.Set.Path as SetUrl
                );

                $notEnrichableShows = (
                    SELECT Host, Path, count(*) as Shows
                    FROM ${SRC_SESSIONS}
                    WHERE DataAvailableCostPlus2 IS NULL
                    GROUP BY Host, Path
                );

                INSERT INTO ${DST_ENRICHED_STATS} WITH TRUNCATE
                SELECT Domain, SetUrl, Shows, Type, TopPosition FROM
                (
                    SELECT os.Domain AS Domain, ss.SetUrl AS SetUrl, ss.Shows AS Shows, os.Type as Type, ROW_NUMBER() OVER w AS TopPosition FROM\s
                    $enrichedShows AS ss
                    INNER JOIN $obSets AS os
                    ON ss.SetUrl == os.SetUrl
                    WINDOW w AS (
                        PARTITION BY os.Domain
                        ORDER BY ss.Shows DESC
                    )
                )
                WHERE TopPosition < 10100
                ORDER BY Domain, TopPosition;

                INSERT INTO ${DST_NOT_ENRICHABLE_STATS} WITH TRUNCATE
                SELECT Domain, Url, Shows, TopPosition FROM
                (
                    SELECT od.Domain AS Domain, (nes.Host || nes.Path) as Url, nes.Shows AS Shows, ROW_NUMBER() OVER w AS TopPosition FROM
                    $notEnrichableShows AS nes
                    INNER JOIN $obDomains AS od
                    ON nes.Host == od.Host
                    WINDOW w AS (
                        PARTITION BY od.Domain
                        ORDER BY nes.Shows DESC
                    )
                )
                WHERE TopPosition < 10100
                ORDER BY Domain, TopPosition;

                $availableAndEnriched = (
                    SELECT Host,
                        count_if(DocMarkersCostPlus IS NOT NULL) as Enriched,
                        count_if(DataAvailableCostPlus IS NOT NULL) as Enrichable
                    FROM $sessionsDeduplicated
                    WHERE DataAvailableCostPlus IS NOT NULL
                    GROUP BY Host
                );

                $captureThematics = Re2::Capture('([^:]+):([^:]+):(.*)');

                SELECT Domain, Type,
                    SUM_IF(Shows, DataAvailableCostPlus is not null) as Enrichable,
                    NVL(SUM_IF(Shows, DocMarkersCostPlus is not null), 0) as Enriched
                FROM $sessionsDeduplicated AS us
                INNER JOIN $obDomains AS od
                ON us.Host == od.Host
                GROUP BY od.Domain as Domain, $captureThematics(us.DataAvailableCostPlus)._2 as Type

                """), rs -> rs, rs -> {
            DomainEnrichmentInfo info = new DomainEnrichmentInfo(rs.getString("Domain"), NativeFeedType.fromCode(rs.getString("Type")), sessionDate,
                    rs.getLong("Enrichable"), rs.getLong("Enriched"), 0L);
            batch.add(info);
            totalStats.computeIfAbsent(info.getType(), k -> new EnrichmentStats()).add(info.getEnrichable(), info.getEnriched(), 1L);
            if (batch.size() >= 1000) {
                feedsDomainEnrichmentYDao.insert(batch);
                batch.clear();
            }
        });
        if (!batch.isEmpty()) {
            feedsDomainEnrichmentYDao.insert(batch);
        }
        // total stats
        List<DomainEnrichmentInfo> total = totalStats.entrySet().stream().map(entry ->
                new DomainEnrichmentInfo("", entry.getKey(), sessionDate, entry.getValue().enrichable, entry.getValue().enriched, entry.getValue().count))
                .collect(Collectors.toList());
        feedsDomainEnrichmentYDao.insert(total);
        settingsService.update(CommonDataType.LAST_PROCESSED_USER_SESSIONS_FEEDS, sessionDate.toString());

        return new Result(TaskResult.SUCCESS);
    }

    private static final class EnrichmentStats {
        long enrichable = 0;
        long enriched = 0;
        long count = 0;

        public void add(long enrichable, long enriched, long count) {
            this.enrichable += enrichable;
            this.enriched += enriched;
            this.count += count;
        }
    }

    public static class TaskState implements PeriodicTaskState {
        public Map<String, String> subsMap;
    }

    @Override
    public PeriodicTaskType getType() {
        return PeriodicTaskType.PREPARE_FEEDS_CLICKS_STATISTICS;
    }

    @Override
    public TaskSchedule getSchedule() {
        return TaskSchedule.startByCron("0 0 8/3 * * *");
    }
}
