package ru.yandex.crypta.graph2.matching.human.workflow.init;

import java.util.List;
import java.util.stream.Collectors;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import ru.yandex.bolts.collection.Cf;
import ru.yandex.bolts.collection.ListF;
import ru.yandex.bolts.collection.Option;
import ru.yandex.bolts.collection.Tuple2;
import ru.yandex.crypta.graph.soup.config.proto.TEdgeType;
import ru.yandex.crypta.graph2.dao.Dao;
import ru.yandex.crypta.graph2.dao.yt.ops.MapOperation;
import ru.yandex.crypta.graph2.matching.human.paths.ComponentsRawTables;
import ru.yandex.crypta.graph2.matching.human.paths.SoupTables;
import ru.yandex.crypta.graph2.matching.human.strategy.HumanMatchingStrategyProvider;
import ru.yandex.crypta.graph2.matching.human.workflow.HumanMatchingTask;
import ru.yandex.crypta.graph2.matching.human.workflow.init.ops.InitEdgesCryptaIdsAndMergeNeighbours;
import ru.yandex.crypta.graph2.matching.human.workflow.init.ops.InitVertexCryptaIdsAndMergeNeighbours;
import ru.yandex.crypta.graph2.model.id.proto.IdInfo;
import ru.yandex.crypta.graph2.model.matching.proto.EdgeBetweenComponents;
import ru.yandex.crypta.graph2.model.matching.proto.EdgeInComponent;
import ru.yandex.crypta.graph2.model.matching.proto.EdgeProtoHelper;
import ru.yandex.crypta.graph2.model.matching.proto.MergeNeighbour;
import ru.yandex.crypta.graph2.model.matching.proto.VertexInComponent;
import ru.yandex.crypta.graph2.model.soup.proto.Edge;
import ru.yandex.crypta.graph2.soup.workflow.ops.CreateBlankMessagesFromEdges;
import ru.yandex.crypta.graph2.workflow.IterableTables;
import ru.yandex.inside.yt.kosher.common.DataSize;
import ru.yandex.inside.yt.kosher.cypress.YPath;

import static ru.yandex.crypta.graph2.matching.human.config.ConfigParser.parseEdgeTypes;

public class InitComponentsAndNeighboursTask
        extends HumanMatchingTask<InitComponentsAndNeighboursTask.MatchingInputTables,
        InitComponentsAndNeighboursTask.GraphInitTables> {

    private static final Logger LOG = LoggerFactory.getLogger(InitComponentsAndNeighboursTask.class);
    private YPath blankEdgeMessages;

    private YPath verticesCryptaIdInit;
    private YPath edgeMessagesSent;
    private YPath mergeByVertexNeighboursTmp;
    private YPath verticesOverlimit;
    private YPath verticesOutdated;
    private YPath sharedEdges;

    private YPath soupEdgesWithCryptaId;
    private YPath soupEdgesBetweenComponentsByMergeKey;
    private YPath mergeByEdgesNeighboursTmp;
    private YPath soupEdgesCcOom;

    private YPath verticesPropertiesWithCryptaId;

    private YPath mergeNeighbours;
    private List<TEdgeType> filterEdgeTypes;
    private InitVertexCryptaIdsAndMergeNeighbours.FilterSoupEdges filterSoupEdges;
    private int edgesPerSourceLimit;


    public InitComponentsAndNeighboursTask(Dao dao, YPath workdir, HumanMatchingStrategyProvider matchingStrategy,
                                           List<String> filterEdgeTypes, int edgesPerSourceLimit,
                                           String experimentName) {
        super(dao, workdir, matchingStrategy, experimentName);

        this.blankEdgeMessages = workdir.child("edge_messages_new");

        this.verticesCryptaIdInit = workdir.child("vertices_with_crypta_ids");
        this.edgeMessagesSent = workdir.child("edge_messages_sent");
        this.mergeByVertexNeighboursTmp = workdir.child("merge_by_vertex_neighbours_tmp");
        this.verticesOverlimit = workdir.child("vertices_overlimit");
        this.verticesOutdated = workdir.child("vertices_outdated");
        this.sharedEdges = workdir.child("shared_edges");

        this.soupEdgesWithCryptaId = workdir.child("soup_edges_crypta_id");
        this.soupEdgesBetweenComponentsByMergeKey = workdir.child("soup_edges_between_components_by_merge_key");
        this.mergeByEdgesNeighboursTmp = workdir.child("merge_by_edges_neighbours_tmp");
        this.soupEdgesCcOom = workdir.child("soup_edges_crypta_id_star_overlimit");

        this.verticesPropertiesWithCryptaId = workdir.child("vertices_properties_crypta_id");

        this.mergeNeighbours = workdir.child("merge_neighbours");

        this.edgesPerSourceLimit = edgesPerSourceLimit;

        this.filterEdgeTypes = parseEdgeTypes(filterEdgeTypes);
        // TODO: remove hardcoded constant
        this.filterSoupEdges = "v2exp".equals(nameSuffix) ?
                InitVertexCryptaIdsAndMergeNeighbours.FilterSoupEdges.EXP :
                InitVertexCryptaIdsAndMergeNeighbours.FilterSoupEdges.PROD;
    }

    private YPath createBlankEdgeMessages(YPath soupEdgesTable) {
        LOG.info("Mapping edge messages");
        dao.ytCypress().createTableWithSchema(blankEdgeMessages,
                ru.yandex.crypta.graph2.model.matching.proto.CryptaIdEdgeMessage.class);

        List<String> columnNames = dao
                .ytCypress()
                .extractTableSchema(Edge.class)
                .getColumnNames()
                .stream()
                .filter(c -> !c.equals(EdgeProtoHelper.DATES_COLUMN))
                .collect(Collectors.toList());

        MapOperation op = dao.ytOps().mapOperation(
                Cf.list(soupEdgesTable.withColumns(Cf.toList(columnNames))),
                Cf.list(blankEdgeMessages),
                new CreateBlankMessagesFromEdges()
        );
        op.getMapSpecBuilder().setDataSizePerJob(DataSize.fromGigaBytes(1));
        op.runSync();

        dao.ytOps().sortOperation(
                Cf.list(blankEdgeMessages),
                blankEdgeMessages,
                InitVertexCryptaIdsAndMergeNeighbours.VERTEX_REDUCE_KEY
        ).runSync();

        return blankEdgeMessages;
    }

    private Tuple2<YPath, YPath> initVerticesCryptaIdsAndMergeKeys(Option<YPath> verticesPrevIterTable,
                                                                   YPath allVPTable,
                                                                   YPath blankEdgeMessages) {
        YPath verticesInTable;
        if (verticesPrevIterTable.isPresent()) {
            verticesInTable = verticesPrevIterTable.get();
        } else {
            // stupid hack: creating fake table to ensure reduce table ordering
            verticesInTable = workdir.child("some_fake_path");
            if (!dao.yt().cypress().exists(verticesInTable)) {
                dao.ytCypress().createTableWithSchema(verticesInTable,
                        ru.yandex.crypta.graph2.model.matching.proto.VertexInComponent.class);
            }
            dao.ytOps().sortSync(verticesInTable, InitVertexCryptaIdsAndMergeNeighbours.VERTEX_REDUCE_KEY);
        }


        LOG.info("Preparing for InitVertexCryptaIdsAndMergeNeighbours");
        // assume verticesInTable sorted
        // assume allVPTable sorted
        // assume edgeMessages sorted

        dao.ytCypress().createTableWithSchema(verticesCryptaIdInit,
                ru.yandex.crypta.graph2.model.matching.proto.VertexInComponent.class);
        dao.ytCypress().createTableWithSchema(edgeMessagesSent,
                ru.yandex.crypta.graph2.model.matching.proto.CryptaIdEdgeMessage.class);
        dao.ytCypress().createTableWithSchema(mergeByVertexNeighboursTmp,
                ru.yandex.crypta.graph2.model.matching.proto.MergeNeighbour.class);
        dao.ytCypress().createTableWithSchema(verticesOverlimit,
                ru.yandex.crypta.graph2.model.matching.proto.VertexOverlimit.class);
        dao.ytCypress().createTableWithSchema(verticesOutdated,
                ru.yandex.crypta.graph2.model.matching.proto.VertexKey.class);
        dao.ytCypress().createTableWithSchema(sharedEdges,
                ru.yandex.crypta.graph2.model.matching.proto.CryptaIdEdgeMessage.class);
        dao.ytCypress().createTableWithSchema(verticesPropertiesWithCryptaId,
                IdInfo.class
        );

        List<String> verticesInColumns = dao.ytCypress().extractTableSchema(VertexInComponent.class).getColumnNames();
        List<String> allVPTableColumns = dao.ytCypress().extractTableSchema(IdInfo.class).getColumnNames();

        LOG.info("Running InitVertexCryptaIdsAndMergeNeighbours ");
        dao.ytOps().reduceSync(
                Cf.list(verticesInTable.withColumns(Cf.wrap(verticesInColumns)),
                        allVPTable.withColumns(Cf.wrap(allVPTableColumns)),
                        blankEdgeMessages),
                Cf.list(verticesCryptaIdInit, verticesPropertiesWithCryptaId,
                        edgeMessagesSent, mergeByVertexNeighboursTmp,
                        verticesOverlimit, verticesOutdated, sharedEdges),
                InitVertexCryptaIdsAndMergeNeighbours.VERTEX_REDUCE_KEY,
                new InitVertexCryptaIdsAndMergeNeighbours(edgesPerSourceLimit, filterEdgeTypes, filterSoupEdges)
        );

        // to merge chunks
        dao.ytOps().sortAllParallel(
                Cf.list(verticesOverlimit, verticesOutdated),
                InitVertexCryptaIdsAndMergeNeighbours.VERTEX_REDUCE_KEY
        );

        return Tuple2.tuple(verticesCryptaIdInit, edgeMessagesSent);

    }

    private void initSoupEdgesCryptaIdsAndMergeKeys(YPath edgeMessagesWithCryptaIds) {

        dao.ytCypress().ensureDir(workdir);

        LOG.info("Preparing for InitEdgesCryptaIdsAndMergeNeighbours: sorting");
        dao.ytOps().sortAllParallel(Cf.list(
                edgeMessagesWithCryptaIds
                //      soupEdgesTable  // assume sorted
        ), InitEdgesCryptaIdsAndMergeNeighbours.MULTI_EDGE_UNIQUE_KEY);

        LOG.info("Running InitEdgesCryptaIdsAndMergeNeighbours");
        dao.ytCypress().createTableWithSchema(soupEdgesWithCryptaId, EdgeInComponent.class);
        dao.ytCypress().createTableWithSchema(soupEdgesBetweenComponentsByMergeKey, EdgeBetweenComponents.class);
        dao.ytCypress().createTableWithSchema(mergeByEdgesNeighboursTmp, MergeNeighbour.class);

        dao.ytOps().reduceSync(
                Cf.list(edgeMessagesWithCryptaIds),
                Cf.list(soupEdgesWithCryptaId,
                        soupEdgesBetweenComponentsByMergeKey,
                        mergeByEdgesNeighboursTmp, soupEdgesCcOom),
                InitEdgesCryptaIdsAndMergeNeighbours.MULTI_EDGE_UNIQUE_KEY,
                new InitEdgesCryptaIdsAndMergeNeighbours(
                        params.getEdgeInfoProvider(),
                        params.getMergeType()
                )
        );

    }

    @Override
    protected void runImpl(MatchingInputTables matchingInputTables) {
        dao.ytCypress().ensureDir(workdir);

        YPath blankEdgeMessages = null;
        if (matchingInputTables.soupTables.mergedEdgeMessages.isPresent()) {
            blankEdgeMessages = matchingInputTables.soupTables.mergedEdgeMessages.get();
        }
        if (blankEdgeMessages == null || !dao.yt().cypress().exists(blankEdgeMessages)) {
            blankEdgeMessages = createBlankEdgeMessages(matchingInputTables.soupTables.mergedSoupEdges);
        }

        // First reduce is aimed to mark all new edges and also take into account components from previous iteration
        Tuple2<YPath, YPath> initVerticesResult = initVerticesCryptaIdsAndMergeKeys(
                matchingInputTables.prevIterationMatching,
                matchingInputTables.soupTables.mergedVerticesProperties,
                blankEdgeMessages
        );

        YPath edgeMessagesCryptaIdInit = initVerticesResult._2;

        initSoupEdgesCryptaIdsAndMergeKeys(edgeMessagesCryptaIdInit);

        ListF<YPath> mergeNeighboursTmp = Cf.list(this.mergeByVertexNeighboursTmp, this.mergeByEdgesNeighboursTmp);
        dao.ytCypress().createTableWithSchema(this.mergeNeighbours, MergeNeighbour.class);
        dao.yt().cypress().concatenate(
                mergeNeighboursTmp,
                this.mergeNeighbours
        );
        dao.ytCypress().removeAll(mergeNeighboursTmp);

        if (!matchingInputTables.soupTables.mergedEdgeMessages.isPresent()) {
            // temporary table
            dao.yt().cypress().remove(blankEdgeMessages);
        }
    }

    @Override
    public GraphInitTables getOutput() {

        ComponentsRawTables graphTables = new ComponentsRawTables(
                verticesCryptaIdInit,
                verticesPropertiesWithCryptaId,
                soupEdgesWithCryptaId
        );

        return new GraphInitTables(
                graphTables,
                mergeNeighbours,
                soupEdgesBetweenComponentsByMergeKey
        );
    }

    @Override
    public String getDescription() {
        return "Inits all vertices and edges with crypta id info. Makes merge offers";
    }

    public static class MatchingInputTables {
        public SoupTables soupTables;
        public Option<YPath> prevIterationMatching;

        public MatchingInputTables(SoupTables soupTables, Option<YPath> prevIterationMatching) {
            this.soupTables = soupTables;
            this.prevIterationMatching = prevIterationMatching;
        }
    }

    public static class GraphInitTables extends IterableTables {
        public ComponentsRawTables graphTables;
        public YPath mergeNeighbours;
        public YPath edgesBetweenComponentsMergeKey;

        public GraphInitTables(ComponentsRawTables graphTables, YPath mergeNeighbours,
                               YPath edgesBetweenComponentsMergeKey) {
            this.graphTables = graphTables;
            this.mergeNeighbours = mergeNeighbours;
            this.edgesBetweenComponentsMergeKey = edgesBetweenComponentsMergeKey;
        }

        @Override
        public ListF<YPath> allTables() {
            return Cf.list(mergeNeighbours, edgesBetweenComponentsMergeKey).plus(graphTables.allTables());
        }
    }

}
