package ru.yandex.chemodan.app.dataapi.worker.dump.full;

import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;

import net.jodah.failsafe.RetryPolicy;
import org.joda.time.DateTime;
import org.joda.time.LocalDate;

import ru.yandex.bolts.collection.Cf;
import ru.yandex.bolts.collection.ListF;
import ru.yandex.bolts.collection.Option;
import ru.yandex.chemodan.app.dataapi.api.data.record.CollectionRef;
import ru.yandex.chemodan.app.dataapi.api.db.ref.AppDatabaseRef;
import ru.yandex.chemodan.app.dataapi.core.dump.DatabaseChangesTskvFields;
import ru.yandex.chemodan.app.dataapi.utils.YtPathsUtils;
import ru.yandex.chemodan.util.retry.RetryManager;
import ru.yandex.chemodan.util.yt.IncrementalLogMrYtRunner;
import ru.yandex.commune.dynproperties.DynamicProperty;
import ru.yandex.inside.yt.kosher.Yt;
import ru.yandex.inside.yt.kosher.cypress.YPath;
import ru.yandex.inside.yt.kosher.impl.ytree.YTreeStringNodeImpl;
import ru.yandex.inside.yt.kosher.operations.Operation;
import ru.yandex.inside.yt.kosher.operations.specs.CommandSpec;
import ru.yandex.inside.yt.kosher.operations.specs.MapReduceSpec;
import ru.yandex.inside.yt.kosher.tables.types.JacksonTableEntryType;
import ru.yandex.misc.lang.StringUtils;
import ru.yandex.misc.log.mlf.Logger;
import ru.yandex.misc.log.mlf.LoggerFactory;

import static ru.yandex.chemodan.app.dataapi.utils.YtPathsUtils.YT_NODE_NAME_FORMATTER;
import static ru.yandex.chemodan.app.dataapi.worker.dump.full.FullDumpDatabaseUsersProcessor.DATASYNC_FULL_DUMP_YT_PATH_PREFIX;
import static ru.yandex.chemodan.app.dataapi.worker.dump.full.YtDumpAndChangesMergeScriptManager.MERGE_SCRIPT_YPATH;

/**
 * @author metal
 */
public class YtDumpAndChangesMerger {
    private static final Logger logger = LoggerFactory.getLogger(YtDumpAndChangesMerger.class);

    private static final String MERGE_TMP_TABLE_SUFFIX = "-MERGE-TMP";

    private final DynamicProperty<String> databaseChangesLogLocationInYt =
            new DynamicProperty<>("dataapi-database-changes-log-location-in-yt", "//statbox/ydisk-dataapi-database-changes-log");
    private final DynamicProperty<ListF<String>> databaseChangesTableNamesToManualMerge =
            new DynamicProperty<>("dataapi-database-changes-manual-merge", Cf.list());

    private final Yt yt;
    private final RetryPolicy ytRetryPolicy;

    public YtDumpAndChangesMerger(Yt yt, RetryPolicy ytRetryPolicy) {
        this.yt = yt;
        this.ytRetryPolicy = ytRetryPolicy;
    }

    public void mergeForDatabase(CollectionRef collectionRef) {
        mergeForDatabase(collectionRef.dbAppId(), collectionRef.databaseId(), collectionRef.collectionId);
    }

    public void mergeForDatabase(String applicationId, String databaseId, String collectionId) {
        YPath dumpFolder = YtPathsUtils.getProperYPath(DATASYNC_FULL_DUMP_YT_PATH_PREFIX,
                new AppDatabaseRef(applicationId, databaseId), Option.of(collectionId));
        YPath dumpPath = YtPathsUtils.getProperYPathForCurrentDump(dumpFolder);

        ListF<YPath> changesPaths = getChangesPathsInYt(dumpFolder);
        if (changesPaths.isEmpty()) {
            logger.info("Doesn't have any recent database changes logs for merging " +
                    applicationId + "." + databaseId + "." + collectionId);
            return;
        }

        mergeWithRetries(dumpPath, changesPaths, applicationId, databaseId, collectionId);
    }

    private void mergeWithRetries(YPath dumpPath, ListF<YPath> ypaths,
            String applicationId, String databaseId, String collectionId)
    {
        new RetryManager()
                .withRetryPolicy(ytRetryPolicy)
                .withLogging("Applying databases changes for yt database dump: " + dumpPath)
                .runSafe(() -> merge(dumpPath, ypaths, applicationId, databaseId, collectionId));
    }

    private void merge(YPath dumpPath, ListF<YPath> ypaths,
            String applicationId, String databaseId, String collectionId)
    {
        MapReduceSpec mapReduceSpec = getMergeSpec(dumpPath, ypaths, applicationId, databaseId, collectionId);

        YPath outputTable = getMergeOutputTable(dumpPath);
        prepareOutputTable(outputTable);

        Operation mapReduce = yt.operations().mapReduceAndGetOp(mapReduceSpec);
        mapReduce.awaitAndThrowIfNotSuccess();

        Operation sort = yt.operations().sortAndGetOp(outputTable, outputTable, Cf.list("uid", "record_id"));
        sort.awaitAndThrowIfNotSuccess();

        yt.cypress().move(outputTable, dumpPath, true, true, false);
    }

    private MapReduceSpec getMergeSpec(YPath dumpPath, ListF<YPath> ypaths,
            String applicationId, String databaseId, String collectionId)
    {
        String scriptParameters = constructMapScriptParameters(applicationId, databaseId, collectionId);
        CommandSpec mapCommandSpec = CommandSpec.builder()
                .setCommand("python merge_dump_and_logs.py map " + scriptParameters)
                .setInputType(new JacksonTableEntryType())
                .setOutputType(new JacksonTableEntryType())
                .setFiles(Cf.list(MERGE_SCRIPT_YPATH))
                .build();

        CommandSpec reduceCommandSpec = CommandSpec.builder()
                .setCommand("python merge_dump_and_logs.py reduce")
                .setInputType(new JacksonTableEntryType())
                .setOutputType(new JacksonTableEntryType())
                .setFiles(Cf.list(MERGE_SCRIPT_YPATH))
                .build();

        ListF<YPath> inputTables = getMergeInputTables(dumpPath, ypaths);
        YPath outputTable = getMergeOutputTable(dumpPath);

        return MapReduceSpec.builder()
                .setInputTables(inputTables)
                .setOutputTables(Cf.list(outputTable))
                .setMapperSpec(mapCommandSpec)
                .setSortBy(Cf.list("uid", "record_id"))
                .setReduceBy(Cf.list("uid", "record_id"))
                .setReducerSpec(reduceCommandSpec)
                .setOutputTableAttributes(Cf.map("generate_date", new YTreeStringNodeImpl(LocalDate.now().toString(), Cf.map())))
                .build();
    }

    private void prepareOutputTable(YPath output) {
        if (yt.cypress().exists(output)) {
            yt.cypress().remove(output);
        }
    }

    private YPath getMergeOutputTable(YPath dumpPath) {
        return dumpPath.parent()
                .child(dumpPath.name() + MERGE_TMP_TABLE_SUFFIX)
                .withAdditionalAttributes(IncrementalLogMrYtRunner.COMPRESSION_ATTRIBUTES);
    }

    private ListF<YPath> getMergeInputTables(YPath dumpPath, ListF<YPath> ypaths) {
        ListF<YPath> inputTables = Cf.arrayList(dumpPath);
        inputTables.addAll(ypaths);
        return inputTables;
    }

    private String constructMapScriptParameters(String applicationId, String databaseId, String collectionId) {
        return StringUtils.join(Cf.list(applicationId, databaseId, collectionId), " ");
    }

    private <T> RetryManager<T> withRetries() {
        return new RetryManager<T>().withRetryPolicy(ytRetryPolicy);
    }

    private long getRowCount(YPath path) {
        return this.<Long>withRetries().get(() -> yt.cypress().get(path.attribute("row_count")).longValue());
    }

    private Option<YPath> findNewestNotEmptyDumpSkipFirst(YPath dumpFolder) {
        List<YPath> dumps = Cf.wrap(this.<List<String>>withRetries().get(() -> yt.cypress().exists(dumpFolder)
                ? yt.cypress().get(dumpFolder).asMap().keySet().stream().sorted().collect(Collectors.toList())
                : Collections.emptyList()))
                .filter(YtDumpBackupManager::hasProperTableNameFormat)
                .sorted().reverse().map(dumpFolder::child).drop(1);
        for (YPath dump: dumps) {
            if (getRowCount(dump) > 0) {
                return Option.of(dump);
            }
        }
        return Option.empty();
    }

    private ListF<YPath> getChangesPathsInYt(YPath dumpFolder) {
        return getChangesPathsInYt(dumpFolder, YPath.simple(databaseChangesLogLocationInYt.get()));
    }

    private ListF<YPath> getChangesPathsInYt(YPath dumpFolder, YPath logFolder) {
        if (databaseChangesTableNamesToManualMerge.get().isEmpty()) {
            // hack: today's table is created before getChangesPathsInYt is called, so the newest table is useless
            // see ru.yandex.chemodan.app.dataapi.worker.dump.full.YtDumpConsistencyManager.updateSpecificCollection()
            return findNewestNotEmptyDumpSkipFirst(dumpFolder).map(path -> {
                DateTime edge = YtPathsUtils.YT_NODE_NAME_FORMATTER.parseDateTime(path.name());
                return Cf.wrap(this.<List<String>>withRetries().get(() -> yt.cypress().exists(logFolder)
                        ? yt.cypress().get(logFolder).asMap().keySet().stream().sorted().collect(Collectors.toList())
                        : Collections.emptyList()))
                        .filter(name -> YtDumpBackupManager.getDateTimeFromYtNodeName(name)
                                .map(dateTime -> !dateTime.isBefore(edge)).getOrElse(false))
                        .map(logFolder::child);
            }).getOrElse(Cf.list());
        } else {
            return databaseChangesTableNamesToManualMerge.get()
                    .map(logFolder::child)
                    .map(this::addSpecificColumnsSelection);
        }
    }

    public ListF<YPath> getChangesPathsInYtForTest(YPath dumpFolder, YPath logFolder) {
        return getChangesPathsInYt(dumpFolder, logFolder);
    }

    private YPath getChangesPathInYt(YPath folder, DateTime date) {
        return folder.child(YT_NODE_NAME_FORMATTER.print(date));
    }

    private YPath addSpecificColumnsSelection(YPath path) {
        return path.withColumns(DatabaseChangesTskvFields.ALL_FIELDS);
    }
}
