'How to properly plot a tree (27k nodes) using a circular tree / leave layout
I have this (unbalanced) tree with 27k+ nodes. It is an hierachy. Now I would to plot it as such to obtain a plot something like this (no idea how you would describe it, but I would call it a circular leave tree...?
However, I am unable to achieve such a result unfortunately. I have already tried igraph
, Networkx
, Maltego
, Graphviz
, Gephi
. Hopefully someone can help me / give me tips & tricks or hints.
Maltego
Gives me quiet easily the following. However I cannot export it to pdf. Furthermore it has this wierd expansion on the top going to the left. I would be able to 'manually' (minimize) move it. However that is not what I would like.
This is btw in my view (with the manual fix) the best result. But I cannot export it to vector or high-res image.
igraph
import igraph as ig
bigGraphAsTupleList = (('a','b'),('b','c'),('b','d'), ..., ('c','e'))
g = ig.Graph.TupleList(bigGraphAsTupleList)
layout = g.layout("rt_circular") #fr (fruchterman reingold), tree, circle, rt_circular (reingold_tilford_circular)
# bbox = size of picture
ig.plot(g,layout=layout,bbox=(10000,10000),target='mygraph.png')
This gives me something like below.
fruchterman reingold (so much overlap of nodes and connections)
Networkx
import networkx as nx
import matplotlib.pyplot as plt
G = nx.Graph()
G.add_edge(...) #build graph
nx.draw_circular(G) #nx.draw_spring(G) #nx.draw_spectral
plt.draw()
plt.show()
Graphviz (via Networkx)
Also a wierd result a bit similar as the next one Maltego. However
import networkx as nx
import matplotlib.pyplot as plt
import pydot
from networkx.drawing.nx_pydot import graphviz_layout
G = nx.Graph()
G.add_edge(...) #build graph
pos = graphviz_layout(G, prog="circo")
plt.figure(1,figsize=(60,60))
nx.draw(G, pos,node_size=10)
plt.show(block=False)
plt.savefig("Graph.png", format="PNG")
Solution 1:[1]
Here is my Python solution using the neato layout engine and the pygraphviz package:
from collections import defaultdict
from pygraphviz import AGraph
def find_min_root(G):
deg = {n : len(e) for (n, e) in G.items()}
worklist = [n for n in deg if deg[n] == 1]
remain = len(G)
while remain > 2:
remain -= len(worklist)
worklist2 = []
for n1 in worklist:
for n2 in G[n1]:
deg[n2] -= 1
if deg[n2] == 1:
worklist2.append(n2)
worklist = worklist2
return worklist
def build_tree(G, n1, visited, tree):
for n2 in G[n1]:
if n2 not in visited:
visited.add(n2)
tree[n1].add(n2)
build_tree(G, n2, visited, tree)
def compute_height(T, n1, heights):
h = heights.get(n1)
if h is None:
children = T[n1]
if not children:
h = 0
else:
h = max([compute_height(T, n2, heights)
for n2 in children]) + 1
heights[n1] = h
return h
def compute_descendants(T, n1, descendants):
d = descendants.get(n1)
if d is None:
children = T[n1]
if not children:
d = 0
else:
d = sum([compute_descendants(T, n2, descendants) + 1
for n2 in children])
descendants[n1] = d
return d
def edge_attrs(n_desc, child_height):
w = 1
if child_height == 0:
pw = 0.25
if n_desc < 20:
l = 0.08
c = '#ffddcc'
elif n_desc < 40:
l = 0.15
c = '#ffddcc'
elif n_desc < 300:
l = 0.20
c = '#ffddcc'
elif n_desc < 500:
l = 0.35
c = '#ffffcc'
else:
l = 0.6
c = '#ddccff'
elif child_height == 1:
c = '#77dd55'
pw = 0.5
if n_desc < 100:
l = 0.4
elif n_desc < 400:
l = 0.6
else:
l = 0.8
elif child_height == 2:
c = '#33bb33'
pw = 0.75
if n_desc < 1000:
l = 0.4
else:
l = 0.8
elif child_height == 3:
c = '#119911'
if n_desc < 1000:
l = 0.4
elif n_desc < 5000:
l = 0.8
else:
l = 3.0
pw = 1.0
else:
c = '#007700'
if n_desc < 1000:
l = 0.4
elif n_desc < 5000:
l = 0.8
else:
l = 3.0
pw = 1.25
return c, l, w, pw
def main():
lines = open('27k.csv').readlines()[1:]
edges = [l.strip().split(',') for l in lines]
# Build undirected graph
G = defaultdict(set)
for child, parent in edges:
G[parent].add(child)
G[child].add(parent)
# Find best root node
root = find_min_root(G)[0]
T = defaultdict(set)
build_tree(G, root, {root}, T)
heights = {}
for n in list(T):
compute_height(T, n, heights)
descendants = {}
for n in T:
compute_descendants(T, n, descendants)
G = AGraph(strict = False, directed = False)
graph_attrs = {
'dpi' : 300
}
G.graph_attr.update(graph_attrs)
node_attrs = {
'shape' : 'point',
'width' : 0.01,
'color' : '#333333'
}
G.node_attr.update(node_attrs)
for n1, edges in T.items():
n_desc = descendants[n1]
for n2 in edges:
c, l, w, pw = edge_attrs(n_desc, heights[n2])
G.add_edge(n1, n2, len = l, color = c, weight = w,
penwidth = pw)
G.draw('tree.png', prog = 'neato')
if __name__ == '__main__':
main()
Your data file isn't a tree so I first convert it into a undirected graph and then I create a well-balanced tree using that graph. Then all that remains is to set the len attribute on every edge to nudge neato into creating a good-looking layout.
Colors and pen widths is just to make the output pretty. You can tweak
it by changing the code in edge_attrs
.
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|---|
Solution 1 | Björn Lindqvist |