Source code for nipy.algorithms.graph.graph

# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*-
# vi: set ft=python sts=4 ts=4 sw=4 et:
"""
This module implements two graph classes:

Graph: basic topological graph, i.e. vertices and edges. This kind of
object only has topological properties

WeightedGraph (Graph): also has a value associated with edges, called
weights, that are used in some computational procedures (e.g. path
length computation).  Importantly these objects are equivalent to
square sparse matrices, which is used to perform certain computations.

This module also provides several functions to
instantiate WeightedGraphs from data:
- k nearest neighbours (where samples are rows of a 2D-array)
- epsilon-neighbors (where sample rows of a 2D-array)
- representation of the neighbors on a 3d grid (6-, 18- and 26-neighbors)
- Minimum Spanning Tree (where samples are rows of a 2D-array)

Author: Bertrand Thirion, 2006--2011
"""
from __future__ import absolute_import

import numpy as np

from scipy.sparse import coo_matrix

[docs]class Graph(object): """ Basic topological (non-weighted) directed Graph class Member variables: * V (int > 0): the number of vertices * E (int >= 0): the number of edges Properties: * vertices (list, type=int, shape=(V,)) vertices id * edges (list, type=int, shape=(E,2)): edges as vertices id tuples """ ### Constructor
[docs] def __init__(self, V, E=0, edges=None): """ Constructor Parameters ---------- V : int the number of vertices E : int, optional the number of edges edges : None or shape (E, 2) array, optional edges of graph """ # deal with vertices self.__set_V(V) self.vertices = np.arange(self.V) # deal with edges if not isinstance(edges, None.__class__): self.__set_E(np.shape(edges)[0]) self.set_edges(edges) else: self.__set_E(E) self.set_edges(np.zeros((self.E, 2), dtype=int))
### Accessors
[docs] def get_vertices(self): """ To get the graph's vertices (as id) """ return self.vertices
[docs] def get_edges(self): """To get the graph's edges """ try: temp = self.edges except: temp = [] return temp
[docs] def get_V(self): """To get the number of vertices in the graph """ return self.V
[docs] def get_E(self): """To get the number of edges in the graph """ return self.E
### Mutators def __set_V(self, V): """ Sets the graph's number of vertices. This methods is defined as private since we don't want the number of vertices to be modified outside the graph object methods. """ self.V = int(V) if self.V < 1: raise ValueError('Empty graphs cannot be created') def __set_E(self, E): """Sets the graph's number of edges. This methods is defined as private since we don't want the number of edges to be modified outside the graph object methods. """ self.E = int(E) if self.E < 0: self.E = 0
[docs] def set_edges(self, edges): """Sets the graph's edges Preconditions: * edges has a correct size * edges take values in [1..V] """ if (not isinstance(edges, None.__class__) and (edges.size != 0)): if ((np.shape(edges)[0] != self.E) or (np.shape(edges)[1] != 2)): raise ValueError('Incompatible size of the edge matrix') if edges.max() + 1 > self.V: raise ValueError('Incorrect edge specification') self.edges = edges else: self.edges = []
### Methods
[docs] def adjacency(self): """returns the adjacency matrix of the graph as a sparse coo matrix Returns ------- adj: scipy.sparse matrix instance, that encodes the adjacency matrix of self """ if self.E > 0: i = self.edges[:, 0] j = self.edges[:, 1] adj = coo_matrix((np.ones(self.E), (i, j)), shape=(self.V, self.V)) else: adj = coo_matrix((self.V, self.V)) return adj
[docs] def cc(self): """Compte the different connected components of the graph. Returns ------- label: array of shape(self.V), labelling of the vertices """ try: from scipy.sparse import cs_graph_components _, label = cs_graph_components(self.adjacency()) except: pass lil = self.to_coo_matrix().tolil().rows.tolist() label = lil_cc(lil) return label
[docs] def degrees(self): """Returns the degree of the graph vertices. Returns ------- rdegree: (array, type=int, shape=(self.V,)), the right degrees ldegree: (array, type=int, shape=(self.V,)), the left degrees """ A = self.adjacency() A.data = np.ones(A.nnz) right = np.array(A.sum(1)).ravel() left = np.array(A.sum(0)).ravel() return right, left
[docs] def main_cc(self): """Returns the indexes of the vertices within the main cc Returns ------- idx: array of shape (sizeof main cc) """ if self.E > 0: cc = self.cc() pop = np.array([np.sum(cc == k) for k in np.unique(cc)]) idx = np.nonzero(cc == pop.argmax())[0] else: idx = 0 return idx
[docs] def to_coo_matrix(self): """ Return adjacency matrix as coo sparse Returns ------- sp: scipy.sparse matrix instance, that encodes the adjacency matrix of self """ if self.E > 0: i, j = self.edges.T sm = coo_matrix((np.ones(self.E), (i, j)), shape=(self.V, self.V)) else: sm = coo_matrix((self.V, self.V)) return sm
[docs] def show(self, ax=None): """Shows the graph as a planar one. Parameters ---------- ax, axis handle Returns ------- ax, axis handle """ import matplotlib.pylab as plt if ax is None: plt.figure() ax = plt.subplot(1, 1, 1) t = (2 * np.pi * np.arange(self.V)) / self.V plt.plot(np.cos(t), np.sin(t), '.') planar_edges = np.ravel((self.edges * 2 * np.pi) / self.V) ax.plot(np.cos(planar_edges), np.sin(planar_edges), 'k') ax.axis('off') return ax
##################################################################### # WeightedGraph #####################################################################
[docs]def wgraph_from_coo_matrix(x): """ Instantiates a weighted graph from a (sparse) coo_matrix Parameters ---------- x: scipy.sparse.coo_matrix instance, the input matrix Returns ------- wg: WeightedGraph instance """ if x.shape[0] != x.shape[1]: raise ValueError("the input coo_matrix is not square") i, j = x.nonzero() edges = np.vstack((i, j)).T weights = x.data wg = WeightedGraph(x.shape[0], edges, weights) return wg
[docs]def wgraph_from_adjacency(x): """Instantiates a weighted graph from a square 2D array Parameters ---------- x: 2D array instance, the input array Returns ------- wg: WeightedGraph instance """ a = coo_matrix(x) return wgraph_from_coo_matrix(a)
[docs]def complete_graph(n): """ returns a complete graph with n vertices """ return wgraph_from_adjacency(np.ones((n, n)))
[docs]def mst(X): """ Returns the WeightedGraph that is the minimum Spanning Tree of X Parameters ---------- X: data array, of shape(n_samples, n_features) Returns ------- the corresponding WeightedGraph instance """ n = X.shape[0] label = np.arange(n).astype(np.int) edges = np.zeros((0, 2)).astype(np.int) # upper bound on maxdist**2 maxdist = 4 * np.sum((X - X[0]) ** 2, 1).max() nbcc = n while nbcc > 1: mindist = maxdist * np.ones(nbcc) link = - np.ones((nbcc, 2)).astype(np.int) # find nearest neighbors for n1 in range(n): j = label[n1] newdist = np.sum((X[n1] - X) ** 2, 1) newdist[label == j] = maxdist n2 = np.argmin(newdist) if newdist[n2] < mindist[j]: mindist[j] = newdist[n2] link[j] = np.array([n1, n2]) # merge nearest neighbors nnbcc = nbcc idx = np.arange(nbcc) for i in range(nnbcc): k, j = label[link[i]] while k > idx[k]: k = idx[k] while j > idx[j]: j = idx[j] if k != j: edges = np.vstack((edges, link[i], np.array([link[i, 1], link[i, 0]]))) idx[max(j, k)] = min(j, k) nbcc -= 1 # relabel the graph label = WeightedGraph(n, edges, np.ones(edges.shape[0])).cc() nbcc = label.max() + 1 d = np.sqrt(np.sum((X[edges[:, 0]] - X[edges[:, 1]]) ** 2, 1)) return WeightedGraph(n, edges, d)
[docs]def knn(X, k=1): """returns the k-nearest-neighbours graph of the data Parameters ---------- X, array of shape (n_samples, n_features): the input data k, int, optional: is the number of neighbours considered Returns ------- the corresponding WeightedGraph instance Notes ----- The knn system is symmeterized: if (ab) is one of the edges then (ba) is also included """ from ..utils.fast_distance import euclidean_distance if np.size(X) == X.shape[0]: X = np.reshape(X, (np.size(X), 1)) try: k = int(k) except: "k cannot be cast to an int" if np.isnan(k): raise ValueError('k is nan') if np.isinf(k): raise ValueError('k is inf') k = min(k, X.shape[0] - 1) # create the distance matrix dist = euclidean_distance(X) sorted_dist = dist.copy() sorted_dist.sort(0) # neighbour system bool_knn = dist < sorted_dist[k + 1] bool_knn += bool_knn.T # xor diagonal bool_knn ^= np.diag(np.diag(bool_knn)) dist *= (bool_knn > 0) return wgraph_from_adjacency(dist)
[docs]def eps_nn(X, eps=1.): """Returns the eps-nearest-neighbours graph of the data Parameters ---------- X, array of shape (n_samples, n_features), input data eps, float, optional: the neighborhood width Returns ------- the resulting graph instance """ from ..utils.fast_distance import euclidean_distance if np.size(X) == X.shape[0]: X = np.reshape(X, (np.size(X), 1)) try: eps = float(eps) except: "eps cannot be cast to a float" if np.isnan(eps): raise ValueError('eps is nan') if np.isinf(eps): raise ValueError('eps is inf') dist = euclidean_distance(X) dist = np.maximum(dist, 1.e-16) dist[dist >= eps] = 0 # this would is just for numerical reasons dist -= np.diag(np.diag(dist)) return wgraph_from_adjacency(dist)
[docs]def lil_cc(lil): """ Returns the connected comonents of a graph represented as a list of lists Parameters ---------- lil: a list of list representing the graph neighbors Returns ------- label a vector of shape len(lil): connected components labelling Notes ----- Dramatically slow for non-sparse graphs """ n = len(lil) visited = np.zeros(n).astype(np.int) label = - np.ones(n).astype(np.int) k = 0 while (visited == 0).any(): front = [np.argmin(visited)] while len(front) > 0: pivot = front.pop(0) if visited[pivot] == 0: visited[pivot] = 1 label[pivot] = k front += lil[pivot] k += 1 return label
[docs]def graph_3d_grid(xyz, k=18): """ Utility that computes the six neighbors on a 3d grid Parameters ---------- xyz: array of shape (n_samples, 3); grid coordinates of the points k: neighboring system, equal to 6, 18, or 26 Returns ------- i, j, d 3 arrays of shape (E), where E is the number of edges in the resulting graph (i, j) represent the edges, d their weights """ if np.size(xyz) == 0: return None lxyz = xyz - xyz.min(0) m = 3 * lxyz.max(0).sum() + 2 # six neighbours n6 = [np.array([1, m, m ** 2]), np.array([m ** 2, 1, m]), np.array([m, m ** 2, 1])] # eighteen neighbours n18 = [np.array([1 + m, 1 - m, m ** 2]), np.array([1 + m, m - 1, m ** 2]), np.array([m ** 2, 1 + m, 1 - m]), np.array([m ** 2, 1 + m, m - 1]), np.array([1 - m, m ** 2, 1 + m]), np.array([m - 1, m ** 2, 1 + m])] # twenty-six neighbours n26 = [np.array([1 + m + m ** 2, 1 - m, 1 - m ** 2]), np.array([1 + m + m ** 2, m - 1, 1 - m ** 2]), np.array([1 + m + m ** 2, 1 - m, m ** 2 - 1]), np.array([1 + m + m ** 2, m - 1, m ** 2 - 1])] # compute the edges in each possible direction def create_edges(lxyz, nn, l1dist=1, left=np.array([]), right=np.array([]), weights=np.array([])): q = 0 for nn_row in nn: v1 = np.dot(lxyz, nn_row) o1 = np.argsort(v1) sv1 = v1[o1] nz = np.squeeze(np.nonzero(sv1[: - 1] - sv1[1:] == - l1dist)) o1z, o1z1 = o1[nz], o1[nz + 1] left = np.hstack((left, o1z, o1z1)) right = np.hstack((right, o1z1, o1z)) q += 2 * np.size(nz) weights = np.hstack((weights, np.sqrt(l1dist) * np.ones(q))) return left, right, weights i, j, d = create_edges(lxyz, n6, 1.) if k >= 18: i, j, d = create_edges(lxyz, n18, 2, i, j, d) if k == 26: i, j, d = create_edges(lxyz, n26, 3, i, j, d) i, j = i.astype(np.int), j.astype(np.int) # reorder the edges to have a more standard order order = np.argsort(i + j * (len(i) + 1)) i, j, d = i[order], j[order], d[order] return i, j, d
[docs]def wgraph_from_3d_grid(xyz, k=18): """Create graph as the set of topological neighbours of the three-dimensional coordinates set xyz, in the k-connectivity scheme Parameters ---------- xyz: array of shape (nsamples, 3) and type np.int, k = 18: the number of neighbours considered. (6, 18 or 26) Returns ------- the WeightedGraph instance """ if xyz.shape[1] != 3: raise ValueError('xyz should have shape n * 3') if k not in [6, 18, 26]: raise ValueError('k should be equal to 6, 18 or 26') i, j, d = graph_3d_grid(xyz, k) edges = np.vstack((i, j)).T return WeightedGraph(xyz.shape[0], edges, d)
[docs]def concatenate_graphs(G1, G2): """Returns the concatenation of the graphs G1 and G2 It is thus assumed that the vertices of G1 and G2 represent disjoint sets Parameters ---------- G1, G2: the two WeightedGraph instances to be concatenated Returns ------- G, WeightedGraph, the concatenated graph Notes ----- This implies that the vertices of G corresponding to G2 are labeled [G1.V .. G1.V+G2.V] """ V = G1.V + G2.V edges = np.vstack((G1.edges, G1.V + G2.edges)) weights = np.hstack((G1.weights, G2.weights)) G = WeightedGraph(V, edges, weights) return G
[docs]class WeightedGraph(Graph): """Basic weighted, directed graph class Member variables: * V (int): the number of vertices * E (int): the number of edges Methods * vertices (list, type=int, shape=(V,)): vertices id * edges (list, type=int, shape=(E,2)): edges as vertices id tuples * weights (list, type=int, shape=(E,)): weights / lengths of the graph's edges """ ### Constructor
[docs] def __init__(self, V, edges=None, weights=None): """ Constructor Parameters ---------- V : int (int > 0) the number of vertices edges : (E, 2) array, type int edges of the graph weights : (E, 2) array, type=int weights/lenghts of the edges """ Graph.__init__(self, V, edges=edges) if isinstance(weights, None.__class__): new_weights = [] else: new_weights = weights self.set_weights(new_weights)
[docs] def set_weights(self, weights): """ Set edge weights Parameters ---------- weights: array array shape(self.V): edges weights """ if np.size(weights) != self.E: raise ValueError('The weight size is not the edges size') else: self.weights = np.reshape(weights, (self.E))
[docs] def get_weights(self): return self.weights
[docs] def from_3d_grid(self, xyz, k=18): """Sets the graph to be the topological neighbours graph of the three-dimensional coordinates set xyz, in the k-connectivity scheme Parameters ---------- xyz: array of shape (self.V, 3) and type np.int, k = 18: the number of neighbours considered. (6, 18 or 26) Returns ------- E(int): the number of edges of self """ if xyz.shape[0] != self.V: raise ValueError('xyz should have shape n * 3, with n = self.V') if xyz.shape[1] != 3: raise ValueError('xyz should have shape n * 3') graph = graph_3d_grid(xyz, k) if graph is not None: i, j, d = graph else: raise TypeError('Creating graph from grid failed. '\ 'Maybe the grid is too big') self.E = np.size(i) self.edges = np.zeros((self.E, 2), np.int) self.edges[:, 0] = i self.edges[:, 1] = j self.weights = np.array(d) return self.E
[docs] def cut_redundancies(self): """ Returns a graph with redundant edges removed: ecah edge (ab) is present ony once in the edge matrix: the correspondng weights are added. Returns ------- the resulting WeightedGraph """ A = self.to_coo_matrix().tocsr().tocoo() return wgraph_from_coo_matrix(A)
[docs] def dijkstra(self, seed=0): """ Returns all the [graph] geodesic distances starting from seed x Parameters ---------- seed (int, >-1, <self.V) or array of shape(p) edge(s) from which the distances are computed Returns ------- dg: array of shape (self.V), the graph distance dg from ant vertex to the nearest seed Notes ----- It is mandatory that the graph weights are non-negative """ import heapq if hasattr(seed, '__iter__') == False: seed = [seed] try: if (self.weights < 0).any(): raise ValueError('some weights are non-positive') except: raise ValueError('undefined weights') dist, active = np.inf * np.ones(self.V), np.ones(self.V) idx, neighb, weight = self.compact_neighb() dist[seed] = 0 dg = list(zip(np.zeros_like(seed), seed)) heapq.heapify(dg) for j in range(self.V): end = False while True: if len(dg) == 0: end = True break node = heapq.heappop(dg) if active[node[1]]: break if end: break dwin, win = node active[win] = False # the folllowing loop might be vectorized l = neighb[idx[win]: idx[win + 1]] newdist = dwin + weight[idx[win]: idx[win + 1]] who = newdist < dist[l] for z in zip(newdist[who], l[who]): heapq.heappush(dg, z) dist[l[who]] = newdist[who] return dist
[docs] def compact_neighb(self): """ returns a compact representation of self Returns ------- idx: array of of shape(self.V + 1): the positions where to find the neighors of each node within neighb and weights neighb: array of shape(self.E), concatenated list of neighbors weights: array of shape(self.E), concatenated list of weights """ order = np.argsort(self.edges[:, 0] * float(self.V) + self.edges[:, 1]) neighb = self.edges[order, 1].astype(np.int) weights = self.weights[order] degree, _ = self.degrees() idx = np.hstack((0, np.cumsum(degree))).astype(np.int) return idx, neighb, weights
[docs] def floyd(self, seed=None): """ Compute all the geodesic distances starting from seeds Parameters ---------- seed= None: array of shape (nbseed), type np.int vertex indexes from which the distances are computed if seed==None, then every edge is a seed point Returns ------- dg array of shape (nbseed, self.V) the graph distance dg from each seed to any vertex Notes ----- It is mandatory that the graph weights are non-negative. The algorithm proceeds by repeating Dijkstra's algo for each seed. Floyd's algo is not used (O(self.V)^3 complexity...) """ if seed is None: seed = np.arange(self.V) dg = None for s in seed: if dg is None: dg = self.dijkstra(s) else: dg = np.vstack((dg, self.dijkstra(s))) return dg
[docs] def normalize(self, c=0): """ Normalize the graph according to the index c Normalization means that the sum of the edges values that go into or out each vertex must sum to 1 Parameters ---------- c=0 in {0, 1, 2}, optional: index that designates the way according to which D is normalized c == 0 => for each vertex a, sum{edge[e, 0]=a} D[e]=1 c == 1 => for each vertex b, sum{edge[e, 1]=b} D[e]=1 c == 2 => symmetric ('l2') normalization Notes ----- Note that when sum_{edge[e, .] == a } D[e] = 0, nothing is performed """ from scipy.sparse import dia_matrix c = int(c) if not c in [0, 1, 2]: raise ValueError('c must be equal to 0, 1 or 2') if self.E == 0: if c < 2: return np.zeros(self.V) else: return np.zeros(self.V), np.zeros(self.V) adj = self.to_coo_matrix().tocsr() s1 = adj.sum(0) s2 = adj.sum(1) if c == 1: s = dia_matrix((1. / s1, 0), shape=(self.V, self.V)) adj = adj * s self.weights = wgraph_from_adjacency(adj).get_weights() return np.asarray(s1) if c == 0: s = dia_matrix((1. / s2.T, 0), shape=(self.V, self.V)) adj = s * adj self.weights = wgraph_from_adjacency(adj).get_weights() return np.asarray(s2) if c == 2: s1 = dia_matrix((1. / np.sqrt(s1), 0), shape=(self.V, self.V)) s2 = dia_matrix((1. / np.sqrt(adj.sum(1)), 0), shape=(self.V, self.V)) adj = (s1 * adj) * s2 self.weights = wgraph_from_adjacency(adj).get_weights() return np.asarray(s1), np.asarray(s2)
[docs] def set_euclidian(self, X): """ Compute the weights of the graph as the distances between the corresponding rows of X, which represents an embdedding of self Parameters ---------- X array of shape (self.V, edim), the coordinate matrix of the embedding """ if np.size(X) == X.shape[0]: X = np.reshape(X, (np.size(X), 1)) if X.shape[0] != self.V: raise ValueError('X.shape[0] != self.V') if self.E > 0: d = np.sum((X[self.edges[:, 0]] - X[self.edges[:, 1]]) ** 2, 1) self.weights = np.sqrt(d)
[docs] def set_gaussian(self, X, sigma=0): """ Compute the weights of the graph as a gaussian function of the distance between the corresponding rows of X, which represents an embdedding of self Parameters ---------- X array of shape (self.V, dim) the coordinate matrix of the embedding sigma=0, float: the parameter of the gaussian function Notes ----- When sigma == 0, the following value is used: ``sigma = sqrt(mean(||X[self.edges[:, 0], :]-X[self.edges[:, 1], :]||^2))`` """ sigma = float(sigma) if sigma < 0: raise ValueError('sigma should be positive') self.set_euclidian(X) d = self.weights if sigma == 0: sigma = (d ** 2).mean() w = np.exp(- (d ** 2) / (2 * sigma)) self.weights = w
[docs] def symmeterize(self): """Symmeterize self, modify edges and weights so that self.adjacency becomes the symmetric part of the current self.adjacency. """ A = self.to_coo_matrix() symg = wgraph_from_adjacency((A + A.T) / 2) self.E = symg.E self.edges = symg.edges self.weights = symg.weights return self
[docs] def anti_symmeterize(self): """anti-symmeterize self, i.e. produces the graph whose adjacency matrix would be the antisymmetric part of its current adjacency matrix """ A = self.to_coo_matrix() symg = wgraph_from_adjacency((A - A.T) / 2) self.E = symg.E self.edges = symg.edges self.weights = symg.weights return self.E
[docs] def voronoi_labelling(self, seed): """ Performs a voronoi labelling of the graph Parameters ---------- seed: array of shape (nseeds), type (np.int), vertices from which the cells are built Returns ------- labels: array of shape (self.V) the labelling of the vertices """ import heapq if hasattr(seed, '__iter__') == False: seed = [seed] try: if (self.weights < 0).any(): raise ValueError('some weights are non-positive') except: raise ValueError('undefined weights') dist, active = np.inf * np.ones(self.V), np.ones(self.V) label = - np.ones(self.V, np.int) idx, neighb, weight = self.compact_neighb() dist[seed] = 0 label[seed] = np.arange(len(seed)) dg = list(zip(np.zeros_like(seed), seed)) heapq.heapify(dg) for j in range(self.V): end = False while True: if len(dg) == 0: end = True break node = heapq.heappop(dg) if active[node[1]]: break if end: break dwin, win = node active[win] = False # the folllowing loop might be vectorized for i in range(idx[win], idx[win + 1]): l, newdist = neighb[i], dwin + weight[i] if newdist < dist[l]: heapq.heappush(dg, (newdist, l)) dist[l] = newdist label[l] = label[win] return label
[docs] def cliques(self): """ Extraction of the graphe cliques these are defined using replicator dynamics equations Returns ------- cliques: array of shape (self.V), type (np.int) labelling of the vertices according to the clique they belong to """ if (self.weights < 0).any(): raise ValueError('cliques definition require a positive graph') cliques, size = - np.ones(self.V), np.zeros(self.V) adj = self.to_coo_matrix() for k in range(self.V): u = cliques < 0 w = np.zeros_like(u) # replicator dynamics iterations for q in range(self.V): w = u.copy() u = (adj * u) * w if u.sum() == 0: break u /= u.sum() if ((w - u) ** 2).sum() < 1.e-12: break # threshold the result threshold = 1. / max(2., 1. * np.sum(cliques == - 1)) cliques[u > threshold] = k if np.sum(u > threshold) == 0: break size[k] = np.sum(u > threshold) if cliques.min() > - 1: break # sort the labels size = size[size > 0] order = np.argsort(- size) label = cliques.copy() for k, vv in enumerate(order): cliques[label == vv] = k return cliques
[docs] def remove_trivial_edges(self): """ Removes trivial edges, i.e. edges that are (vv)-like self.weights and self.E are corrected accordingly Returns ------- self.E (int): The number of edges """ if self.E > 0: valid = self.edges[:, 0] != self.edges[:, 1] self.edges = self.edges[valid] self.weights = self.weights[valid] self.E = np.sum(valid) return self.E
[docs] def subgraph(self, valid): """ Creates a subgraph with the vertices for which valid>0 and with the correponding set of edges Parameters ---------- valid, array of shape (self.V): nonzero for vertices to be retained Returns ------- G, WeightedGraph instance, the desired subgraph of self Notes ----- The vertices are renumbered as [1..p] where p = sum(valid>0) when sum(valid==0) then None is returned """ if np.size(valid) != self.V: raise ValueError("incompatible size for self anf valid") if np.sum(valid > 0) == 0: return None if self.E > 0: win_edges = (valid[self.edges]).min(1) > 0 edges = self.edges[win_edges] weights = self.weights[win_edges] renumb = np.hstack((0, np.cumsum(valid > 0))) edges = renumb[edges] G = WeightedGraph(np.sum(valid > 0), edges, weights) else: G = WeightedGraph(np.sum(valid > 0)) return G
[docs] def kruskal(self): """ Creates the Minimum Spanning Tree of self using Kruskal's algo. efficient is self is sparse Returns ------- K, WeightedGraph instance: the resulting MST Notes ----- If self contains several connected components, will have the same number k of connected components """ k = self.cc().max() + 1 E = 2 * self.V - 2 V = self.V Kedges = np.zeros((E, 2)).astype(np.int) Kweights = np.zeros(E) iw = np.argsort(self.weights) label = np.arange(V) j = 0 for i in range(V - k): a, b = self.edges[iw[j]] d = self.weights[iw[j]] while label[a] == label[b]: j = j + 1 a, b = self.edges[iw[j]] d = self.weights[iw[j]] if label[a] != label[b]: lb = label[b] label[label == lb] = label[a] Kedges[2 * i] = np.array([a, b]) Kedges[2 * i + 1] = np.array([b, a]) Kweights[2 * i: 2 * i + 2] = d K = WeightedGraph(V, Kedges, Kweights) return K
[docs] def voronoi_diagram(self, seeds, samples): """ Defines the graph as the Voronoi diagram (VD) that links the seeds. The VD is defined using the sample points. Parameters ---------- seeds: array of shape (self.V, dim) samples: array of shape (nsamples, dim) Notes ----- By default, the weights are a Gaussian function of the distance The implementation is not optimal """ from .bipartite_graph import cross_knn # checks if seeds.shape[0] != self.V: raise ValueError("The numberof seeds is not as expected") if np.size(seeds) == self.V: seeds = np.reshape(seeds, (np.size(seeds), 1)) if np.size(samples) == samples.shape[0]: samples = np.reshape(samples, (np.size(samples), 1)) if seeds.shape[1] != samples.shape[1]: raise ValueError("The seeds and samples do not belong \ to the same space") #1. define the graph knn(samples, seeds, 2) j = cross_knn(samples, seeds, 2).edges[:, 1] #2. put all the pairs i the target graph Ns = np.shape(samples)[0] self.E = Ns self.edges = np.array( [j[2 * np.arange(Ns)], j[2 * np.arange(Ns) + 1]]).T self.weights = np.ones(self.E) #3. eliminate the redundancies and set the weights self.cut_redundancies() self.symmeterize() self.set_gaussian(seeds)
[docs] def show(self, X=None, ax=None): """ Plots the current graph in 2D Parameters ---------- X : None or array of shape (self.V, 2) a set of coordinates that can be used to embed the vertices in 2D. If X.shape[1]>2, a svd reduces X for display. By default, the graph is presented on a circle ax: None or int, optional ax handle Returns ------- ax: axis handle Notes ----- This should be used only for small graphs. """ if np.size(self.weights) == 0: return Graph.show() wm = self.weights.max() import matplotlib.pylab as mp if ax is None: mp.figure() ax = mp.subplot(1, 1, 1) ml = 5. if (X is None): for e in range(self.E): A = (self.edges[e, 0] * 2 * np.pi) / self.V B = (self.edges[e, 1] * 2 * np.pi) / self.V C = max(1, int(self.weights[e] * ml / wm)) mp.plot([np.cos(A), np.cos(B)], [np.sin(A), np.sin(B)], 'k', linewidth=C) t = (2 * np.pi * np.arange(self.V)) / self.V mp.plot(np.cos(t), np.sin(t), 'o', linewidth=ml) mp.axis([-1.1, 1.1, -1.1, 1.1]) return ax if (X.shape[0] != self.V): raise ValueError('X.shape(0)!=self.V') if np.size(X) == self.V: X = np.reshape(X, (self.V, 1)) if X.shape[1] == 1: # plot the graph on a circle x = np.pi * (X - X.min()) / (X.max() - X.min()) for e in range(self.E): A = x[self.edges[e, 0]] B = x[self.edges[e, 1]] C = max(1, int(self.weights[e] * ml / wm)) mp.plot([np.cos(A), np.cos(B)], [np.sin(A), np.sin(B)], 'k', linewidth=C) mp.plot(np.cos(x), np.sin(x), 'o', linewidth=ml) mp.axis([-1.1, 1.1, -0.1, 1.1]) if X.shape[1] > 2: Y = X.copy() from numpy.linalg import svd M1, M2, M3 = svd(Y, 0) Y = np.dot(M1, np.diag(M2)) Y = Y[:, :1] if X.shape[1] < 3: Y = X if Y.shape[1] == 2: for e in range(self.E): A = self.edges[e, 0] B = self.edges[e, 1] C = max(1, int(self.weights[e] * ml / wm)) mp.plot([Y[A, 0], Y[B, 0]], [Y[A, 1], Y[B, 1]], 'k', linewidth=C) mp.plot(Y[:, 0], Y[:, 1], 'o', linewidth=ml) xmin, xmax = Y[:, 0].min(), Y[:, 0].max() ymin, ymax = Y[:, 1].min(), Y[:, 1].max() xmin = 1.1 * xmin - 0.1 * xmax xmax = 1.1 * xmax - 0.1 * xmin ymin = 1.1 * ymin - 0.1 * ymax ymax = 1.1 * ymax - 0.1 * ymin mp.axis([xmin, xmax, ymin, ymax]) return ax
[docs] def remove_edges(self, valid): """ Removes all the edges for which valid==0 Parameters ---------- valid : (self.E,) array """ if np.size(valid) != self.E: raise ValueError("the input vector does not have the correct size") valid = np.reshape(valid, np.size(valid)) self.E = int(valid.sum()) self.edges = self.edges[valid != 0] self.weights = self.weights[valid != 0]
[docs] def list_of_neighbors(self): """ returns the set of neighbors of self as a list of arrays """ return self.to_coo_matrix().tolil().rows.tolist()
[docs] def copy(self): """ returns a copy of self """ G = WeightedGraph(self.V, self.edges.copy(), self.weights.copy()) return G
[docs] def left_incidence(self): """ Return left incidence matrix Returns ------- left_incid: list the left incidence matrix of self as a list of lists: i.e. the list[[e.0.0, .., e.0.i(0)], .., [e.V.0, E.V.i(V)]] where e.i.j is the set of edge indexes so that e.i.j[0] = i """ linc = [] for i in range(self.V): linc.append([]) for e in range(self.E): i = self.edges[e, 0] a = linc[i] a.append(e) return linc
[docs] def right_incidence(self): """ Return right incidence matrix Returns ------- right_incid: list the right incidence matrix of self as a list of lists: i.e. the list[[e.0.0, .., e.0.i(0)], .., [e.V.0, E.V.i(V)]] where e.i.j is the set of edge indexes so that e.i.j[1] = i """ rinc = [] for i in range(self.V): rinc.append([]) for e in range(self.E): i = self.edges[e, 1] a = rinc[i] a.append(e) return rinc
[docs] def is_connected(self): """ States whether self is connected or not """ if self.V < 1: raise ValueError("empty graph") if self.V < 2: return True if self.E == 0: return False cc = self.cc() return int(cc.max() == 0)
[docs] def to_coo_matrix(self): """ Return adjacency matrix as coo sparse Returns ------- sp: scipy.sparse matrix instance that encodes the adjacency matrix of self """ if self.E > 0: i, j = self.edges.T sm = coo_matrix((self.weights, (i, j)), shape=(self.V, self.V)) else: sm = coo_matrix((self.V, self.V)) return sm