from igraph import *
from plfit import *
import sys
# Read in the graph in GML format
g = read("wikipedia.gml",format="gml")
#g = Graph.Erdos_Renyi(n=300, m=250)
# Summary information about the graph
summary(g)
# Undirected degree distribution (the GML file itself is directed)
degrees = g.degree(mode="all")
print 'Degree distribution: [' + str(min(degrees)) + ',' + str(max(degrees)) + ']'
# Fit the power-law distribution. If $D < 0.05, the Kolmogorov Smirnov test tells
# us that the distribution is power-law.
# Also, look for the estimated power-law exponent $alpha and $xmin (the point at
# which you should start fitting the distribution)
# make sure you have executed all the code in plfit.R before running this function
# an explanation is here: http://tuvalu.santafe.edu/~aaronc/powerlaws/
# you can download the file directly here: http://tuvalu.santafe.edu/~aaronc/powerlaws/plfit.r
# You will also need the VGAM R package which you can download via the installer
[alpha, xmin, L]= plfit(degrees)
print 'Power-law degree distribution: ' + str(alpha)
print ' - xmin: ', xmin
print ' - L: ', L
sys.stdout.flush()
# plot the cumulative empirical distribution
#cumy = c()
#y = tabulate(degrees)
#x = 1:length(y)
#for (i in 1:length(x)) {
# cumy[i] = sum(y[i:length(x)])/sum(y)
#}
#options(scipen=10)
#plot(x,cumy,log="xy",xlab="degree k",ylab="P(x) >= k",cex=0.5)
# overlay the fitted distribution
#startval = cumy[a$xmin]
#fittedvals = (a$xmin:max(x))^(-a$alpha + 1)*(startval)/a$xmin^(-a$alpha + 1)
#points(a$xmin:max(x),fittedvals,type='l',col='red')
# In and out degrees separately
# use the degree() function and options for calculating directed degree
# see the documentation here: http://igraph.sourceforge.net/documentation.html
# In-degree
# MAX: Geometry
indegrees = g.indegree() # g.degree(mode="IN")
print 'Max in-degree: ', max(indegrees)
print '- Node: ', g.vs.select(_indegree = max(indegrees))["label"]
sys.stdout.flush()
# Out-degree
# MAX: List of mathematics articles
outdegrees = g.outdegree() # g.degree(mode="out")
print 'Max in-degree: ', max(outdegrees)
print '- Node: ', g.vs.select(_outdegree = max(outdegrees))["label"]
sys.stdout.flush()
# see which nodes have the max out and indegree
# for example, if you were to store the outdegree in the vector od, you could look up the page name like so:
#V(g)$label[which.max(od)]
# find undirected betweenness scores and then nodes with the max betweenness
# warning, can be slow with large graphs, you may consider betweenness.estimate instead
bb = g.betweenness(directed=True) # -> Calculus (1m 43s)
# bb = g.betweenness(directed=False) # -> Judaism (5m)
maxindex, maxvalue = max(enumerate(bb), key=operator.itemgetter(1))
print 'Max betweenness: ', maxvalue
print '- Node: ', g.vs[maxindex]["label"]
sys.stdout.flush()
# this high betweennes node may seem a bit surprising
# you can check out its neighbors like this. The +1, -1 business is a real
# pain. It is because R indexes from 1 onward, but igraph likes to number
# its vertices starting with 0. So you have to do a back and forth dance
# V(g)$label[V(g)[nei(which.max(bb)-1)]+1]
# calculate Page Rank and find the node having the highest pagerank
# you'll want the $vector portion of the answer returned
# the assignment doesn't ask about this, but it's good to know how to do this...
# pr = g.pagerank()
# V(g)$label[which.max(pr$vector)]
# calculate the Bonacich alpha-centrality of a lattice
# I'm not quite convinced that igraph calculates these correctly, but the behavior makes sense
# to me for these values of alpha
#glfour = graph.lattice( c(4,4) )
#ac = glfour.alphacentrality(glfour,alpha=-0.5)
#plot(glfour,layout=layout.kamada.kawai,vertex.size = 20*ac/(max(ac)-min(ac)))
#ac = alpha.centrality(glfour,alpha=+0.25)
#plot(glfour,layout=layout.kamada.kawai,vertex.size = 20*ac/(max(ac)-min(ac)))