package ru.yandex.crypta.graph2.soup.workflow;

import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import ru.yandex.bolts.collection.Cf;
import ru.yandex.bolts.collection.ListF;
import ru.yandex.bolts.collection.MapF;
import ru.yandex.bolts.collection.Option;
import ru.yandex.crypta.graph.soup.config.Soup;
import ru.yandex.crypta.graph2.dao.Dao;
import ru.yandex.crypta.graph2.dao.yt.ops.Await;
import ru.yandex.crypta.graph2.dao.yt.ops.MapOperation;
import ru.yandex.crypta.graph2.model.soup.edge.Edge;
import ru.yandex.crypta.graph2.model.soup.edge.EdgeProtoHelper;
import ru.yandex.crypta.graph2.model.soup.edge.weight.estimator.EdgeWeightEstimator;
import ru.yandex.crypta.graph2.model.soup.edge.weight.estimator.NormalizedDatesActivityEdgeWeightEstimator;
import ru.yandex.crypta.graph2.model.soup.edge.weight.estimator.SurvivalEdgeWeightEstimator;
import ru.yandex.crypta.graph2.model.soup.sources.DefaultEdgeTypeConfigProvider;
import ru.yandex.crypta.graph2.soup.config.SoupAndStorageProcessingParams;
import ru.yandex.crypta.graph2.soup.workflow.ops.CreateBlankMessagesFromEdges;
import ru.yandex.crypta.graph2.soup.workflow.ops.PrepareSoupEdges;
import ru.yandex.crypta.graph2.workflow.EmptyInput;
import ru.yandex.crypta.graph2.workflow.Task;
import ru.yandex.inside.yt.kosher.common.DataSize;
import ru.yandex.inside.yt.kosher.common.GUID;
import ru.yandex.inside.yt.kosher.cypress.YPath;
import ru.yandex.inside.yt.kosher.ytree.YTreeNode;

import static java.util.stream.Collectors.groupingBy;
import static java.util.stream.Collectors.joining;
import static ru.yandex.crypta.graph2.model.soup.edge.EdgeProtoHelper.getTableName;


public class PrepareSoupTask extends Task<EmptyInput, ListF<YPath>, SoupAndStorageProcessingParams> {

    private static final Logger LOG = LoggerFactory.getLogger(PrepareSoupTask.class);
    public static final String ID_COLUMN = "id";
    public static final String ID_TYPE_COLUMN = "id_type";
    public static final ListF<String> VERTEX_REDUCE_KEY = Cf.list(ID_COLUMN, ID_TYPE_COLUMN);

    private YPath todayEdgesMerged;
    private YPath todayEdgesDecayed;
    private YPath edgeMessages;
    private final PrepareSoupWorkflow.ProcessingDates processingDates;

    public PrepareSoupTask(Dao dao, YPath workdir, SoupAndStorageProcessingParams matchingParams,
                           PrepareSoupWorkflow.ProcessingDates processingDates) {
        super(dao, workdir, matchingParams);
        this.processingDates = processingDates;
        this.todayEdgesMerged = workdir.child("soup_edges");
        this.todayEdgesDecayed = workdir.child("soup_edges_decayed");
        this.edgeMessages = workdir.child("edge_messages");
    }

    @Override
    public void runImpl(EmptyInput soupDir) {

        List<YPath> inputTables;
        if (params.getSoupDir().isPresent()) {
            inputTables = findNotEmptySoupTables();
        } else {
            inputTables = params.getSoupTable();
        }

        dao.ytCypress().ensureDir(workdir);

        dao.ytTr().withTransactionAndTmpDir((tx, tmpDir) -> {

            LOG.info("Merging all soup tables to single edge table");
            Option<GUID> txId = Option.of(tx.getId());

            PrepareSoupEdges mapper = createMapper(inputTables.size());

            YPath todayEdgesTmp = dao.ytCypress().createTableWithSchema(
                    txId, tmpDir.child("soup_edges_merged_tmp1"), Edge.class
            );

            YPath todayEdgesDecayedTmp = dao.ytCypress().createTableWithSchema(
                    txId, tmpDir.child("soup_edges_decayed_tmp"), Edge.class
            );

            MapOperation op = dao.ytOps().mapOperation(
                    Cf.x(inputTables),
                    Cf.list(todayEdgesTmp, todayEdgesDecayedTmp),
                    mapper
            );
            // double size of output causes job slowdown
            // make jobs smaller
            op.getMapSpecBuilder().setDataSizePerJob(DataSize.fromGigaBytes(1));
            op.runSync(txId);

            MapF<YPath, YPath> tablesToProcess = Cf.hashMap();
            tablesToProcess.put(todayEdgesMerged, todayEdgesTmp);
            tablesToProcess.put(todayEdgesDecayed, todayEdgesDecayedTmp);

            Await.all(
                    tablesToProcess.values().map( tempTable ->
                        dao.ytOps().sortAsync(
                                txId, Cf.list(tempTable), tempTable,
                                Edge.EDGE_UNIQUE_KEY
                    ))
            );

            tablesToProcess.forEach(
                    (destTable, tempTable) -> {
                        dao.ytCypress().move(txId, tempTable, destTable);
                        dao.ytCypress().remove(txId, tempTable);
                        processingDates.setDates(dao, txId, destTable);
                    }
            );
        });
        createBlankEdgeMessages(todayEdgesMerged);
    }

    private PrepareSoupEdges createMapper(int soupTablesCount) {
        DefaultEdgeTypeConfigProvider edgeTypeConfigProvider = new DefaultEdgeTypeConfigProvider();

        LOG.info("Maybe searching native libs in " + System.getProperty("java.library.path"));
        LOG.info("Init SurvivalEdgeWeightEstimator from " + params.getSoupSurvivalModelTable());
        EdgeWeightEstimator datesEdgeWeightEstimator = NormalizedDatesActivityEdgeWeightEstimator.buildFromYt(
                dao,
                params.getSoupStatsTable().flatMapO(dao.ytCypress()::checkExistence),
                edgeTypeConfigProvider
        );
        EdgeWeightEstimator survivalEdgeWeightEstimator = SurvivalEdgeWeightEstimator.buildFromYt(
                dao,
                params.getSoupSurvivalModelTable().flatMapO(dao.ytCypress()::checkExistence),
                edgeTypeConfigProvider
        );
        return new PrepareSoupEdges(
                edgeTypeConfigProvider,
                datesEdgeWeightEstimator,
                survivalEdgeWeightEstimator,
                processingDates.soupGenerateDate,
                soupTablesCount
        );
    }

    private List<YPath> findNotEmptySoupTables() {
        // raw edges from soup with all-time history
        var tablesByExistence = Soup.CONFIG.getEdgeTypes()
                .stream()
                .filter(
                        et -> Soup.CONFIG.getEdgeUsage(et).getHumanMatching() ||
                                Soup.CONFIG.getEdgeUsage(et).getHumanMatchingExp() ||
                                Soup.CONFIG.getEdgeUsage(et).getSoupUpdate())
                .map(et -> params.getSoupDir().get().child(getTableName(et)))
                .collect(Collectors.groupingBy(p -> dao.yt().cypress().exists(p)));

        List<YPath> existingSoupTables = tablesByExistence.getOrDefault(true, List.of());
        List<YPath> notFoundAllSoupTables = tablesByExistence.getOrDefault(false, List.of());

        LOG.info(
                "Today soup tables:\n{}",
                existingSoupTables.stream().map(Objects::toString).collect(joining("\n"))
        );
        if (!notFoundAllSoupTables.isEmpty()) {
            LOG.warn(
                    "Not found expected soup tables, Check EdgeType configs:\n{}",
                    notFoundAllSoupTables.stream().map(Objects::toString).collect(joining("\n"))
            );
        }

        var tablesByEmptiness = existingSoupTables.stream().collect(groupingBy(t -> dao.ytCypress().isEmpty(t)));
        List<YPath> emptyTables = tablesByEmptiness.getOrDefault(true, List.of());
        List<YPath> notEmptyTables = tablesByEmptiness.getOrDefault(false, List.of());

        if (!emptyTables.isEmpty()) {
            LOG.warn(
                    "Some soup tables are empty!\n{}.",
                    emptyTables.stream().map(Objects::toString).collect(joining("\n"))
            );
        }
        for (YPath table : notEmptyTables) {
            // just debug for some weird behavior in tests
            YTreeNode schemaAttr = dao.yt().cypress().get(table.attribute("schema"));
            LOG.info("Schema of soup table {} is {}", table, schemaAttr);
        }
        return notEmptyTables;
    }

    private YPath createBlankEdgeMessages(YPath soupEdgesTable) {

        dao.ytTr().withTransactionAndTmpDir((tx, tmpDir) -> {
            LOG.info("Mapping edge messages");
            Option<GUID> txId = Option.of(tx.getId());

            YPath edgeMessagesTmp = dao.ytCypress().createTableWithSchema(
                    txId, tmpDir.child("edge_messages_tmp"),
                    ru.yandex.crypta.graph2.model.matching.proto.CryptaIdEdgeMessage.class
            );

            List<String> columnNames = dao
                    .ytCypress()
                    .extractTableSchema(ru.yandex.crypta.graph2.model.soup.proto.Edge.class)
                    .getColumnNames()
                    .stream()
                    .filter(c -> !c.equals(EdgeProtoHelper.DATES_COLUMN))
                    .collect(Collectors.toList());

            MapOperation op = dao.ytOps().mapOperation(
                    Cf.list(soupEdgesTable.withColumns(Cf.toList(columnNames))),
                    Cf.list(edgeMessagesTmp),
                    new CreateBlankMessagesFromEdges()
            );
            // double size of output causes job slowdown
            // make jobs smaller
            op.getMapSpecBuilder().setDataSizePerJob(DataSize.fromGigaBytes(1));
            op.runSync(txId);

            dao.ytOps().sortOperation(
                    Cf.list(edgeMessagesTmp),
                    edgeMessagesTmp,
                    VERTEX_REDUCE_KEY
            ).runSync(txId);
            dao.ytCypress().move(txId, edgeMessagesTmp, edgeMessages);
            dao.ytCypress().remove(txId, edgeMessagesTmp);

            processingDates.setDates(dao, txId, edgeMessages);
        });
        return edgeMessages;
    }

    @Override
    public ListF<YPath> getOutput() {
        if (params.getSoupTable().isPresent()) {
            return Cf.list(params.getSoupTable().get(), edgeMessages);
        } else {
            return Cf.list(todayEdgesMerged, edgeMessages);
        }
    }

    @Override
    public String getDescription() {
        return "Prepares all soup tables for matching";
    }
}
