import scipy.sparse
import sklearn.manifold
import sklearn.cluster

import collections


def make_graph(matches, feature_object_ids, min_confidence):
    graph = {foid: [] for foid in feature_object_ids}

    for feature_object_id_1, feature_object_id_2, confidence in matches:
        if min_confidence > confidence:
            continue

        graph[feature_object_id_1].append((feature_object_id_2, confidence))
        graph[feature_object_id_2].append((feature_object_id_1, confidence))

    return graph


def make_subgraph(graph, nodes):
    nodes = set(nodes)

    def filter_edges(edges, nodes):
        return [(node, conf) for node, conf in edges if node in nodes]

    return {node: filter_edges(graph[node], nodes) for node in nodes}


def define_components(graph):
    component_id_by_node = {}

    component_id = 0
    for node in graph:
        if node in component_id_by_node:
            continue

        nodes = [node]
        while nodes:
            current = nodes[0]
            component_id_by_node[current] = component_id
            nodes.pop(0)

            for neighbour, conf in graph[current]:
                if neighbour not in component_id_by_node:
                    nodes.append(neighbour)
                    component_id_by_node[neighbour] = component_id

        component_id += 1

    component_by_id = collections.defaultdict(list)
    for foid, component_id in component_id_by_node.items():
        component_by_id[component_id].append(foid)

    return list(component_by_id.values())


class Index:
    def __init__(self):
        self.index = {}
        self.listing = []

    def __call__(self, feature_object_id):
        if feature_object_id in self.index:
            return self.index[feature_object_id]

        i = len(self.index)
        self.index[feature_object_id] = i
        self.listing.append(feature_object_id)

        return i

    def feature_object_id(self, index):
        return self.listing[index]


def make_adjacency_matrix(graph):
    index = Index()

    rows, cols = [], []
    values = []

    for node, edges in graph.items():
        for neighbour, conf in edges:
            rows.append(index(node))
            cols.append(index(neighbour))
            values.append(conf)

    return scipy.sparse.coo_matrix((values, (rows, cols))), index


def find_clusters(graph, embedding_size=3):
    assert len(graph) > embedding_size

    adjacency, index = make_adjacency_matrix(graph)

    transform = sklearn.manifold.SpectralEmbedding(n_components=embedding_size, affinity='precomputed')
    embedding = transform.fit_transform(adjacency)
    cluster_ids = sklearn.cluster.MeanShift().fit_predict(embedding)

    cluster_by_id = collections.defaultdict(list)
    for node_id, cluster_id in enumerate(cluster_ids):
        cluster_by_id[cluster_id].append(index.feature_object_id(node_id))

    return list(cluster_by_id.values())


def spectral(matches, feature_object_ids, min_confidence, embedding_size=3, min_component_size=10):
    graph = make_graph(matches, feature_object_ids, min_confidence)
    components = define_components(graph)

    result = []
    for component in components:
        size = len(component)

        if size >= max(min_component_size, embedding_size + 2):
            result += find_clusters(make_subgraph(graph, component), embedding_size)
        else:
            result.append(component)

    return result
