'Python Connected Components edges list

I use these algorithms in python for finding connected components from edges.

components = []

def connected_components(pairs):
    for a, b in pairs:
        for component in components:
            if a in component:
                for i, other_component in enumerate(components):
                    if b in other_component and other_component != component: # a, and b are already in different components: merge
                        component.extend(other_component)
                        components[i:i+1] = []
                        break # we don't have to look for other components for b
                else: # b wasn't found in any other component
                    if b not in component:
                        component.append(b)
                break # we don't have to look for other components for a
            if b in component: # a wasn't in in the component 
                component.append(a)
                break # we don't have to look further
        else: # neither a nor b were found
            components.append([a, b])
    return components

This algorithms return components like this :

[ [n1,n2,n4],[n3,n5] ]

I would like to have the list of all edges in connected components like this :

[ [(n1,n2),(n2,n4),(n4,n1)],[(n3,n5)] ] 

in the same order of the previous list but i don't know how creates this list

Thank you for your help.



Solution 1:[1]

Note: This doesn't require any python dependency.

I will share my approach, with recursive depth-first search. I am assuming graph is bi-directional and the following code can be easily manipulated for directed graph.

pairs = [] // edge list
adj_list = {} // adjacency list
vis = [] // visited_list
connected_components = [] // contains all the connected components
temp_component = [] 

// basic depth first search
def dfs( node ):
     vis[node] = "true"
     temp_component.append(node)
     for neighbour in adj_list[node]:
       if vis[neighbour] == "false":
          dfs(neigbour)

//main
for a,b in pairs:
 if a not in adj_list:
  adj_list[a] = [] 
 if b not in adj_list:
  adj_list[b] = []
 adj_list[a].append(b)
 adj_list[b].append(a)
 vis["a"] = "false"
 vis["b"] = "false" 

for a,b in pairs:
  temp_component = []
  if vis[a] == "false":
   dfs(a)
 if len(temp_component) > 0:
   connected_components.append(temp_component)

 // once you have connected components you can get the edge lists in connected component as well
 answer = [] 
 for component in connected_components:
    temp_pairs = [] // contains the pair of edges for the current connected component
     for node in component:
        for i,j in pairs: 
          if (node == i or node == j) and (i,j) not in temp_node: 
              temp_node.append(i,j)
     answer.append(temp_pairs) 

Solution 2:[2]

Create a mini graph in python using apgl library. You can use SparseGraph module from apgl. from apgl.graph import SparseGraph

Initiate a sparsegraph with number of nodes required. graph = SparseGraph(num_vertices)

Then you can create a mini graph by adding edges between nodes of graph. graph.addEdge(component1, component2)

Then just use findConnectedComponents function to find connected components. graph.findConnectedComponents()

Solution 3:[3]

I know this is an old question, but I came across the same problem and was not glad with the performance of the given answers. So I wanted to share my own solution using scipy's connected_components and shortest_path functions to handle arbitrary edge-soups.

from scipy.sparse import csr_matrix, csgraph
import numpy as np

def coords_to_indices(coords):
    """
    Decompose a set of primitives defined by vertex-coordinates to their shared vertices and primitive indices

    Parameters
    ----------
    coords : array like
        A NxMxD array, where N is the number of primitives, M the number of vertices per primitive and D the number of dimensions of each vertex

    Returns
    -------
    vertices : NDArray
        UxD array containing the unique vertices
    primitives : NDArray
        NxM array containing the indices of vertices building the primitives
    """

    coords = np.asarray(coords)
    vert_dim = coords.shape[-1]
    prim_dim = coords.shape[1]
    vertices, rev_indx = np.unique(coords.reshape((-1, vert_dim)), axis=0, return_inverse=True)
    primitives = rev_indx.reshape((coords.shape[0], prim_dim))
    return vertices, primitives


def is_ordered(edge_indices) -> bool:
    """
    Check if the edges are ordered or not

    Parameters
    ----------
    edge_indices : Nx2 array of indices
        Indices of the edge vertices

    Returns
    -------
    bool
        True, if all edges are in order. That means, every edge is connected with the following one by its second vertex.
    """

    edge_indices = np.asarray(edge_indices)
    e_flat = edge_indices.flatten()[1:-1]
    return all(e_flat[1::2] - e_flat[::2] == 0)


def reorder_edges(connected_edges):
    """
    Reorder an unsorted list of edges (indices Nx2) or coordinates (Nx2xM) so that:
        - Each edge is connected to the next edge
        - The connection is made from the second vertex to the first vertex of the following edge

    Parameters
    ----------
    connected_edges : array-like, Nx2 indices or Nx2xM coordinates
        edges that build the segment. all edges have to be connected, but every vertex has to be shared by exactly 2 edges.
        If the segment is not closed, two vertices are allowed to rise up in only one edge (the ends).

    Returns
    -------
    NDArray - like the input
     The edges, reordered
    """

    connected_edges = np.asarray(connected_edges)
    if is_ordered(connected_edges):
        return connected_edges

    if connected_edges.ndim == 3 and connected_edges.shape[1] == 2 and np.issubdtype(connected_edges.dtype, np.floating):
        # vertex coordinates given, transform to edge indices and back
        verts, edges = coords_to_indices(connected_edges)
        e_ordered = reorder_edges(edges)
        return verts[e_ordered]

    assert np.issubdtype(connected_edges.dtype, np.integer) and connected_edges.ndim == 2 and connected_edges.shape[1] == 2, "Wrong shape"

    edges = connected_edges

    n_edges = edges.shape[0]
    n_verts = edges.max() + 1

    # find the unique indices and counts of the vertices
    idxs, counts = np.unique(connected_edges.flat, return_counts=True)

    # translate edges to monotone space

    if np.all(counts == 2):
        # cyclic contour (all vertices are counted twice)
        # wo have to cut the cycle and restore it afterwards. Otherwise, its hard to follow the two valid paths
        edges = edges[1:]
        n_edges -= 1
        new_edges = reorder_edges(edges)
        # add the missing piece. Due to the strict order of the indices, this should always be between the very first and very last index
        new_edges = np.row_stack((new_edges, (new_edges[-1, -1], new_edges[0, 0])))
        return new_edges

    # open contour
    # find the open ends in the chain of segments
    ends = idxs[counts == 1]

    assert len(ends) == 2, "More than 2 unconnected segments found. not a contiguous contour"

    # lets begin the connection walk on one of the end-segments. I choose the minimum, so maybe the indices are rising again
    start_index, end_index = np.sort(ends)

    # build sparse matrix of edge relations
    csm = csr_matrix((np.full(n_edges, 1, dtype=np.bool_), (edges[:, 0], edges[:, 1])), (n_verts, n_verts))

    # get shortest path and number of hops
    n_hops, prev_idx = csgraph.shortest_path(csm, directed=False, indices=start_index, return_predecessors=True, unweighted=True)

    # limit to the existing vertices
    n_hops = n_hops[idxs]
    prev_idx = prev_idx[idxs]
    vert_order = np.argsort(n_hops)

    assert np.all(np.isfinite(n_hops)), "Unreachable parts detected"

    # check that the hops are increasing monotonously, otherwise something went wrong
    dst = n_hops[vert_order]
    assert np.all(dst[1:] - dst[:-1] == 1), "path not contiguous... something went wrong"

    # get indices of neighbors
    ordered_idxs = prev_idx[vert_order][1:]
    # add the farthest node
    ordered_idxs = np.append(ordered_idxs, end_index)

    new_edges = np.column_stack((ordered_idxs[:-1], ordered_idxs[1:]))

    return new_edges


def find_connected(edges):
    """
    Find and return connected components in a soup of edges. Each component will be returned as an ordered list of edges

    Parameters
    ----------
    edges : array_Like
        A list of edges. Either as a Nx2 list of indices or as a Nx2xD list of vertex coordinates per edge

    Returns
    -------
    List[NDArray]
        A list of Mx2 or Mx2xD arrays (depending on the input type), each containing an ordered list of connected (and maybe cyclic) edges
    """

    if edges.ndim == 3 and edges.shape[1] == 2 and np.issubdtype(edges.dtype, np.floating):
        # vertex coordinates given, transform to edge indices and back
        verts, edges = coords_to_indices(edges)
        connected_edges = find_connected(edges)
        # return the result as coordinate array
        return [verts[e] for e in connected_edges]

    # get number of edges and maximum number of vertices
    n_edges = edges.shape[0]
    n_verts = edges.max() + 1

    # create a sparse matrix of the relations
    csm = csr_matrix((np.full(n_edges, 1, dtype=np.bool_), (edges[:, 0], edges[:, 1])), (n_verts, n_verts))

    # get number and labels of the connected components. labels refer to the vertices
    n_c, labels = csgraph.connected_components(csm, directed=False, return_labels=True)

    # get the association to the edges. should not matter which column
    edge_labels = labels[edges]
    assert np.all(edge_labels[:, 0] == edge_labels[:, 1])

    connected_edges = []
    for label in range(n_c):
        # get mask for current label:
        edge_mask = edge_labels[:, 0] == label
        if not np.any(edge_mask):
            #this vertex was no member of any edge
            continue

        # iterate labels and gather all edges of that label
        edges_l = edges[edge_mask, :]

        # reorder if necessary
        if not is_ordered(edges_l):
            edges_l = reorder_edges(edges_l)

        connected_edges.append(edges_l)

    return connected_edges

Short Example:

def makePoly(npts=360, r=10):
    angles = np.linspace(0, 2*np.pi, npts)
    x = np.sin(angles) * r
    y = np.cos(angles) * r
    return x, y
    
    
# create a polygon
verts = np.array(makePoly(16)).T
# create the edges
edges = np.column_stack((np.arange(verts.shape[0]), np.arange(verts.shape[0])+1))
# close the loop
edges[-1, -1] = 0

# shuffe all edges
np.random.seed(123)
np.random.shuffle(edges)
# remove some edges, so we get multiple chains
edges = np.delete(edges, [1, 2, 3], axis=0)
# swap some vertex orders
swap = [4, 5, 6, 7, 8, 9]
edges[swap, :] = edges[swap, :][:, [1, 0]]
# now reorder everything
edges_ordered = find_connected(edges)
print(edges_ordered)

Result is

[
array([[ 0, 15],
       [15, 14],
       [14, 13],
       [13, 12],
       [12, 11]]),

array([[1, 2],
       [2, 3],
       [3, 4]]), 

array([[ 5,  6],
       [ 6,  7],
       [ 7,  8],
       [ 8,  9],
       [ 9, 10]])
]

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1
Solution 2 Ankur Yadav
Solution 3