package ru.yandex.direct.jobs.contentcategories;

import java.util.List;
import java.util.Map;

import javax.annotation.ParametersAreNonnullByDefault;

import com.google.common.collect.ImmutableMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;

import ru.yandex.direct.ansiblejuggler.model.notifications.NotificationMethod;
import ru.yandex.direct.config.DirectConfig;
import ru.yandex.direct.core.entity.ppcproperty.model.PpcPropertyEnum;
import ru.yandex.direct.env.NonDevelopmentEnvironment;
import ru.yandex.direct.env.ProductionOnly;
import ru.yandex.direct.juggler.JugglerStatus;
import ru.yandex.direct.juggler.check.annotation.JugglerCheck;
import ru.yandex.direct.juggler.check.annotation.OnChangeNotification;
import ru.yandex.direct.juggler.check.model.CheckTag;
import ru.yandex.direct.juggler.check.model.NotificationRecipient;
import ru.yandex.direct.scheduler.Hourglass;
import ru.yandex.direct.scheduler.support.DirectJob;
import ru.yandex.direct.tracing.Trace;
import ru.yandex.direct.ytwrapper.YtPathUtil;
import ru.yandex.direct.ytwrapper.client.YtProvider;
import ru.yandex.direct.ytwrapper.model.YqlQuery;
import ru.yandex.direct.ytwrapper.model.YtCluster;
import ru.yandex.direct.ytwrapper.model.YtSQLSyntaxVersion;
import ru.yandex.inside.yt.kosher.cypress.Cypress;
import ru.yandex.inside.yt.kosher.cypress.CypressNodeType;
import ru.yandex.inside.yt.kosher.cypress.YPath;
import ru.yandex.misc.io.ClassPathResourceInputStreamSource;

import static java.util.Collections.emptyList;
import static java.util.stream.Collectors.toList;
import static ru.yandex.direct.juggler.check.model.CheckTag.DIRECT_PRIORITY_1;

/**
 * Джоба подготовки урлов для разметки Толокой
 */
@JugglerCheck(ttl = @JugglerCheck.Duration(hours = 4),
        tags = {DIRECT_PRIORITY_1, CheckTag.DIRECT_PRODUCT_TEAM, CheckTag.YT},
        notifications = {
                @OnChangeNotification(recipient = NotificationRecipient.LOGIN_AMMSAID,
                        status = {JugglerStatus.OK, JugglerStatus.CRIT},
                        method = NotificationMethod.TELEGRAM),
        },
        needCheck = ProductionOnly.class
)
@Hourglass(cronExpression = "0 50 * * * ?", needSchedule = NonDevelopmentEnvironment.class)
@ParametersAreNonnullByDefault
public class ContentCategoriesTolokaInputPrepareJob extends DirectJob {

    private static final Logger logger = LoggerFactory.getLogger(ContentCategoriesTolokaInputPrepareJob.class);
    private static final YtCluster YT_CLUSTER = YtCluster.HAHN;
    private static final String PREPARE_TOLOKA_INPUT_QUERY = String.join("\n",
            new ClassPathResourceInputStreamSource("contentcategories/prepare_toloka_input_table.sql").readLines());
    private static final Map<String, List<String>> KEYWORDS_BY_CATEGORIES_GROUP = ImmutableMap.<String, List<String>>builder()
            .put("tobacco_video-games_gambling_alcohol", List.of("65536", "4096", "256", "32768"))
            .put("negative_profanity_childrens_adult", List.of("8", "32", "128", "2"))
            .put("weapons_sharing_terrorism_contravenes-legislation", List.of("16", "1024", "4", "1"))
            .put("religion_occultism_medical_dating", List.of("2048", "131072", "16384", "8192"))
            .put("tragedy_politics_news", List.of("262144", "64", "512"))
            .put("auto_business_travel_home", List.of("4294968296", "4294968297", "4294968298", "4294968302"))
            .put("pets_food_law_games", List.of("4294968305", "4294968308", "4294968313", "4294968314"))
            .put("internet_arts_books", List.of("4294968317", "4294968318", "4294968319"))
            .put("electronics_beauty_society_science", List.of("4294968323", "4294968324", "4294968328", "4294968329"))
            .put("estate_news_job_education", List.of("4294968331", "4294968332", "4294968333", "4294968334"))
            .put("sport_family_finance_hobby_shopping", List.of("4294968335", "4294968349", "4294968341", "4294968342", "4294968345"))
            .build();

    private final YtProvider ytProvider;
    private final ContentCategoriesService contentCategoriesService;
    private final DirectConfig tolokaConfig;

    @Autowired
    public ContentCategoriesTolokaInputPrepareJob(
            YtProvider ytProvider,
            ContentCategoriesService contentCategoriesService,
            DirectConfig directConfig
    ) {
        this.ytProvider = ytProvider;
        this.contentCategoriesService = contentCategoriesService;
        this.tolokaConfig = directConfig.getBranch("content_categories").getBranch("toloka");
    }

    @Override
    public void execute() {
        var ytOperator = ytProvider.getOperator(YT_CLUSTER, YtSQLSyntaxVersion.SQLv1);
        var ytClusterConfig = ytProvider.getClusterConfig(YT_CLUSTER);

        String rawUrlsFolder = tolokaConfig.getString("raw_urls_folder");
        var cypress = ytOperator.getYt().cypress();

        var rawUrlsYPath = YPath.simple(rawUrlsFolder);
        if (!cypress.exists(rawUrlsYPath)) {
            logger.error("Yt folder {} not found", rawUrlsFolder);
            return;
        }

        var rawUrlsTableTimestamps = getTableTimestamps(cypress, rawUrlsYPath, "^raw_urls_[0-9]+$");

        String tolokaInputPath = tolokaConfig.getString("input_path");
        String executedProjectsTablePath = tolokaConfig.getString("executed_projects_table");
        if (!cypress.exists(YPath.simple(executedProjectsTablePath))) {
            logger.error("Yt table {} not found", executedProjectsTablePath);
            return;
        }

        Long defaultDayLimit = tolokaConfig.getLong("url_day_limit");
        Long rowsLimit = contentCategoriesService.getUrlsLimit(
                PpcPropertyEnum.JOBS_CONTENT_CATEGORIES_TOLOKA_URLS_DAY_LIMIT, defaultDayLimit);

        for (var entry : KEYWORDS_BY_CATEGORIES_GROUP.entrySet()) {
            String categoriesGroup = entry.getKey();
            var categoriesGroupKeywords = entry.getValue();

            String tolokaInputFolder = YtPathUtil.generatePath(tolokaInputPath, categoriesGroup);
            YPath tolokaInputYPath = YPath.simple(tolokaInputFolder);
            if (!cypress.exists(tolokaInputYPath)) {
                cypress.create(tolokaInputYPath, CypressNodeType.MAP, true);
                logger.info("Yt folder {} created", tolokaInputFolder);
            }

            var tolokaTableTimestamps = getTableTimestamps(cypress, tolokaInputYPath, "^input_bs_[0-9]+$");
            var newRawUrlsTs = rawUrlsTableTimestamps.stream()
                    .filter(ts -> !tolokaTableTimestamps.contains(ts))
                    .min(String::compareTo);

            if (newRawUrlsTs.isPresent()) {
                String ts = newRawUrlsTs.get();
                String rawUrlsTable = YtPathUtil.generatePath(rawUrlsFolder, "raw_urls_" + ts);
                String inputBsTable = YtPathUtil.generatePath(tolokaInputFolder, "input_bs_" + ts);
                String categoriesList = String.join(",", categoriesGroupKeywords);
                try (var ignore = Trace.current().profile("content_categories:yql", "toloka_input_prepare:" + categoriesGroup)) {
                    ytOperator.yqlExecute(new YqlQuery(PREPARE_TOLOKA_INPUT_QUERY, ytClusterConfig.getUser(),
                            rawUrlsTable, inputBsTable, executedProjectsTablePath, categoriesList, categoriesGroup,
                            rowsLimit)
                                    .withTitle("content_categories:toloka_input_prepare:" + categoriesGroup)
                    );

                    logger.info("Toloka input data for raw_urls_{} loaded to {}", ts, inputBsTable);
                }
            }
        }
    }

    private static List<String> getTableTimestamps(Cypress cypress, YPath folderPath, String regex) {
        try {
            var timestamps = cypress.list(folderPath)
                    .stream()
                    .filter(table -> table.stringValue().matches(regex))
                    .map(table -> table.stringValue().split("_")[2])
                    .collect(toList());
            logger.info("Got timestamp list from {}: {}", folderPath, timestamps);
            return timestamps;
        } catch (Exception e) {
            logger.warn("Can't get table list from " + folderPath, e);
            return emptyList();
        }
    }
}
