MW example
From InterSciWiki
###################################################### #### Cohesive Blocking Algorithm and Utilities #### ###################################################### #### Written by Peter McMahan #### ######################################################
require(RBGL) ## OK to not have RBGL, but it speeds some stuff up # Need RSQLite too require(igraph, warn.conflicts = FALSE, quietly=TRUE) require(digest)
###################################################### #### A note on parallelization #### ###################################################### ## Many of these functions have an optional argument## ## `cl`. If used, this should be a cluster object ## ## from the package "snow". I used "Rmpi" to use ## ## this functionality. ## ######################################################
###################################################### ## Main cohesive blocking function. Feed it a graph ## ## of class igraph and it returns an object of ## ## class bgraph. Definitely requires digest, and ## ## will use RSQLite for large graphs if it is avai- ## ## lable. ## ######################################################
cohesive.blocks <- function(G,db=NULL, useDB=NULL,cl=NULL){
aigraph <- "package:igraph" %in% search()
if(aigraph){
silentwarnings <- capture.output(
detach(package:igraph,unload=TRUE)
)
}
anetwork <- "package:network" %in% search()
if(!anetwork){
silentwarnings <- capture.output(
require(network, warn.conflicts = FALSE, quietly=TRUE)
)
}
if(is.null(useDB)){
useDB <- network.size(G)>400 & require(RSQLite)
}
if(useDB & !require(RSQLite)) stop("package `RSQLite` required")
if(!require(digest)) stop("package `digest` required")
if(!is.network(G)) stop("g must be an network object")
G <- network2igraph(G)
Gin <- G
G <- simplify(as.undirected2(G))
V(G)$cbid <- as.numeric(V(G))
infCohesion <- vcount(G)+1 ## for fully connected subcomponents
g <- G
v <- sort(as.numeric(V(g)))
## set up database connection and schema
if(is.null(db) & useDB){
## initialize the database and define schema
dbfile <- tempfile("cohesive.blocks",getwd())
con <- dbConnect(dbDriver("SQLite"),dbname=dbfile)
## branchMembership
dbSendQuery(con,"create table branchMembership (vertexId integer, branchId integer)")
dbSendQuery(con,"create index index_membership on branchMembership (vertexId, branchId)")
dbSendQuery(con,"create index index_vertex on branchMembership (vertexId)")
dbSendQuery(con,"create index index_branch on branchMembership (branchId)")
## branches (unexamined branches given cohesion of (-1))
dbSendQuery(con,"create table branches (branchId integer primary key, membershipHash text, branchCohesion integer default -1, maxAncestorCohesion integer, isCohesiveBlock integer default 0)")
dbSendQuery(con,"create index index_hash on branches (membershipHash)")
## subBranches
dbSendQuery(con,"create table subBranches (parentId integer, childId integer)")
dbSendQuery(con,"create index index_parentId on subBranches (parentId)")
##
## Add the root branch to the whole graph
##
## create the hash
v <- sort(as.numeric(V(g)))
thisHash <- digest(paste(paste(v,collapse=" "),0))
dbBeginTransaction(con)
dbSendQuery(con,paste("insert into branches (branchId,membershipHash,maxAncestorCohesion,isCohesiveBlock) values(",0,",'",thisHash,"',",0,",",1,"); ",sep=""))
for(thisV in v){
dbSendQuery(con,paste("insert into branchMembership values(",thisV,",",0,"); ",sep=""))
}
dbCommit(con)
maxId <- 0
}else if(useDB){
## connect to existing db
dbfile <- db
con <- dbConnect(dbDriver("SQLite"),dbname=dbfile)
maxId <- dbGetQuery(con,"select max(branchId) from branches")
}else{
branchMembership <- data.frame(vertexId=sort(as.numeric(V(G))),branchId=0)
thisHash <- digest(paste(paste(v,collapse=" "),0))
branches <- data.frame(branchId=0,membershipHash=thisHash,branchCohesion=-1,maxAncestorCohesion=0,isCohesiveBlock=1)
subBranches <- data.frame(parentId=NA,childId=NA)
}
## now that setup's done, start looping
notDone <- T
while(notDone){
## which branch are we working on this time
## DATA ACCESS:
if(useDB){
branchId <- as.numeric(dbGetQuery(con,"select branchId from branches where branchCohesion=-1 limit 1"))
}else{
branchId <- branches$branchId[branches$branchCohesion==-1][1]
}
cat("Branch ",branchId,": ",sep="")
## get data and make g
## DATA ACCESS:
if(useDB){
v <- sort(as.numeric(unlist(dbGetQuery(con,paste("select vertexId from branchMembership where branchId=",branchId,sep=""))1)))
maxId <- as.numeric(dbGetQuery(con,"select max(branchId) from branches"))
}else{
v <- sort(branchMembership$vertexId[branchMembership$branchId==branchId])
maxId <- max(branches$branchId)
}
g <- subgraph(G,V(G)[cbid %in% v])
## check if trivial or fully connected, else treat normally
if(vcount(g) < 3){ #trivial
cat("Branch single or binary -- no cohesive subgroups.\n")
k <- 0
if(useDB){
dbSendQuery(con,paste("update branches set branchCohesion=",k," where branchId=",branchId,";",sep=""))
notDone <- dbGetQuery(con,"select count(*) from branches where branchCohesion=-1")>0
#dbSendQuery(con,paste("update branches set branchCohesion=0 where branchId=",branchId,";",sep=""))
}else{
branches$branchCohesion[branches$branchId==branchId] <- k
notDone <- any(branches$branchCohesion==-1)
}
}else if(all(degree(g)>=vcount(g))){ #fully connected
cat("Branch fully connected -- setting cohesion to ",infCohesion,".\n",sep="")
k <- infCohesion
if(useDB){
dbSendQuery(con,paste("update branches set branchCohesion=",k," where branchId=",branchId,";",sep=""))
notDone <- dbGetQuery(con,"select count(*) from branches where branchCohesion=-1")>0
#dbSendQuery(con,paste("update branches set branchCohesion=",infCohesion,", isCohesiveBlock=1 where branchId=",branchId,";",sep=""))
}else{
branches$branchCohesion[branches$branchId==branchId] <- k
notDone <- any(branches$branchCohesion==-1)
}
}else{ #keep looking
## find cohesion of g
if(!is.connected(g)){
k <- 0
}else if(min(degree(g))==1){
k <- 1
}else{
k <- graph.cohesion(g)
}
cat(k,"-cohesive; ",sep="")
## trim it down:
if(useDB){
mac <- max(k,unlist(dbGetQuery(con,paste("select maxAncestorCohesion from branches where branchId =",branchId,";"))))
}else{
mac <- max(k,unlist(branches$maxAncestorCohesion[branches$branchId==branchId]))
}
cat(length(v))
while(any(degree(g)<=mac)){ ## trimming goes on here
g <- subgraph(g,V(g)[degree(g)>mac])
}
cat("(",vcount(g),") vertices; ",sep="")
## find k-components
## For nastier graphs:
## check if cohesion after trimming is greater than k. If so, kcomp <- list(as.numeric(V(g)))
if(vcount(g)>100 & k>1){ ## only try speedup for relatively complicated kComponents() calls
if(!is.connected(g)){
newk <- 0
}else if(min(degree(g))==1){
newk <- 1
}else{
newk <- graph.cohesion(g)
}
if(newk>k){
kcomp <- list(as.numeric(V(g)))
}else{
kcomp <- kComponents(g,k,cl)
}
}else{ ## otherwise just use kComponents()
kcomp <- kComponents(g,k,cl)
}
kcomp <- lapply(kcomp,function(thisV){V(g)[thisV]$cbid})
cat(length(kcomp)," sub-branches, ",sep="")
## which have been searched?
theseHashes <- lapply(kcomp,function(x){digest(paste(paste(sort(unlist(x)),collapse=" "),mac))})
## DATA ACCESS:
if(useDB){
searched <- dbGetQuery(con,paste("select branchId, membershipHash from branches where membershipHash in ('",paste(theseHashes,collapse="','"),"');",sep=""))
}else{
searched <- branches[c("branchId","membershipHash")][branches$membershipHash %in% theseHashes,]
}
searchedCount <- nrow(searched)
## assign appropriate branchIds
branchIds <- as.numeric(rep(NA,length(kcomp)))
isNew <- rep(T,length(kcomp))
if(nrow(searched)>0){
branchIds[ind <- match(searched$membershipHash,theseHashes)] <- searched$branchId
branchIds[-ind] <- (maxId+1):(maxId+length(branchIds)-length(ind))
isNew[ind] <- F
}else if(length(branchIds)>0){
branchIds <- (maxId+1):(maxId+length(branchIds))
}
## DATA ACCESS:
## add all branches to `subBranches` and only new branches to `branches` and `branchMembership`
if(useDB){
dbBeginTransaction(con)
if(length(kcomp)>0){for(i in 1:length(kcomp)){
dbSendQuery(con,paste("insert into subBranches values(",branchId,",",branchIds[i],");",sep=""))
if(isNew[i]){ ## if new insert into branches branchMembership
dbSendQuery(con,paste("insert into branches (branchId,membershipHash,maxAncestorCohesion) values(",branchIds[i],",'",theseHashes[i],"',",mac,");",sep=""))
for(thisV in sort(unlist(kcompi))){
dbSendQuery(con,paste("insert into branchMembership values(",thisV,",",branchIds[i],"); ",sep=""))
}
}
}}
dbSendQuery(con,paste("update branches set branchCohesion=",k," where branchId=",branchId,";",sep=""))
dbCommit(con)
notDone <- dbGetQuery(con,"select count(*) from branches where branchCohesion=-1")>0
}else{
if(length(kcomp)>0){
subBranches <- rbind(subBranches,data.frame(parentId=rep(branchId,length(branchIds)),childId=branchIds))
branches <- rbind(branches,data.frame(branchId=branchIds,membershipHash=unlist(theseHashes),branchCohesion=-1,maxAncestorCohesion=mac,isCohesiveBlock=0)[isNew,])
branchMembership <- rbind(branchMembership,data.frame(vertexId=unlist(kcomp[isNew]),branchId=rep(branchIds[isNew],lapply(kcomp[isNew],length))))
}
branches$branchCohesion[branches$branchId==branchId] <- k
notDone <- any(branches$branchCohesion==-1)
}
cat("(",searchedCount," complete);\n",sep="")
}
}
## mark the cohesive blocks
if(useDB){
dbSendQuery(con,"update branches set isCohesiveBlock=1 where maxAncestorCohesion < branchCohesion")
}else{
branches$isCohesiveBlock[branches$maxAncestorCohesion < branches$branchCohesion] <- 1
}
## and extract the data we need
if(useDB){
cBlocks <- dbGetQuery(con,"select * from branches where isCohesiveBlock>0")
cbMembership <- dbGetQuery(con,paste("select * from branchMembership where branchId in (",paste(cBlocks$branchId,collapse=","),");",sep=""))
subBranches <- dbReadTable(con,"subBranches")
lapply(dbListResults(con),dbClearResult) ## just-in-case cleanup
dbDisconnect(con)
}else{
cBlocks <- branches[branches$isCohesiveBlock>0,]
cbMembership <- branchMembership[branchMembership$branchId %in% cBlocks$branchId,]
subBranches <- subBranches[!(is.na(subBranches$parentId) | is.na(subBranches$childId)),]
}
## get block members and cohesion
blocks <- list()
block.cohesion <- numeric()
for(i in cBlocks$branchId){
blocks <- c(blocks,list(cbMembership$vertexId[cbMembership$branchId==i]))
block.cohesion <- c(block.cohesion,cBlocks$branchCohesion[cBlocks$branchId==i])
}
## build the block hierarchy graph. first the vertices and attributes:
tree <- graph.empty(nrow(cBlocks))
V(tree)$branchId <- cBlocks$branchId
V(tree)$nodes <- blocks
V(tree)$chnodes <- lapply(blocks,paste,collapse=",")
V(tree)$cohesion <- block.cohesion
## and now the edges:
for(v in V(tree)){
p <- find.cohesive.parents(V(tree)[v]$branchId,cBlocks$branchId,subBranches)
e <- rep(v,times=2*length(p))
e[(1:length(p))*2-1] <- V(tree)[match(p,V(tree)$branchId)-1]
tree <- add.edges(tree,e)
}
## put it all together and return
if(useDB){
res <- list(graph=Gin,tree=tree,blocks=blocks,block.cohesion=block.cohesion,data=dbfile)
}else{
res <- list(graph=Gin,tree=tree,blocks=blocks,block.cohesion=block.cohesion,data=list(branches=branches,branchMembership=branchMembership,subBranches=subBranches))
}
class(res) <- "bgraph"
return(res)
}
- A small helper function to identify the cohesive ##
- parents of a given block inside cohesive.blocks. ##
find.cohesive.parents <- function(id,blockIds,subBranches){
res <- numeric()
Q <- subBranches$parentId[subBranches$childId==id]
while(length(Q)>0){
thisId <- Q[1]
Q <- Q[-1]
if(is.na(thisId)) stop("NA in subBranches. something's wrong")
if(thisId %in% blockIds){
res <- c(res,thisId)
}else{
Q <- c(Q,subBranches$parentId[subBranches$childId==thisId])
}
}
return(unique(res))
}
###################################################### ## kCutsets uses Kanevsy's algorithm to find all ## ## cutsets of size k in the given graph. ## ######################################################
kCutsets <- function(g,k=NULL,cl=NULL){
if(!is.igraph(g)){
stop("g must be an igraph object")
}
if(is.null(k)){
if(!is.connected(g)){
k <- 0
}else if(min(degree(g))<=1){
k <- 1
}else{
k <- vertex.connectivity(g)
}
}
if(k==0 | vcount(g) <=2) return(list(numeric()))
if(k==1){ # for 1-connected graphs, use RBGL's articulationPoints()
return(articulation.points(g)) ## function defined below, converts to graphNEL and calls articulationPoints()
}
#############
## begin Kanevsky's (parallel) algorithm:
#############
v <- as.numeric(V(g))
K <- v[order(degree(g),decreasing=T)][1:k] #2a
if(is.cutset(K,g)) res <- c(res,list(K))
P <- expand.grid(K,v) #2b
G <- list(g) #3
for(i in 2:nrow(P)){
Gi <- add.edges(g,as.numeric(t(P[1:(i-1),])))
}
if(is.null(cl)){
Gbar <- lapply(G,etReduction) #4 (sequential)
mflow <- unlist(lapply(1:nrow(P),function(i){graph.maxflow(Gbari,P[i,1]+vcount(g),P[i,2])}))
Gbar <- Gbar[mflow==k] #5 (sequential)
P <- P[mflow==k,]
if(nrow(P)<1) return(list())
GbarRes <- lapply(1:nrow(P),function(i){graph.residual(Gbari,P[i,1]+vcount(g),P[i,2])}) #6 (sequential)
comp <- lapply(GbarRes,clusters,mode="strong")
L <- lapply(1:length(comp),function(i){graph.shrink(GbarResi,comp=compi)})
L <- lapply(L,graph.antichains,cl=cl) #7 (sequential)
## convert antichains in L to vertex sets in g
res <- lapply(1:length(L),
function(i){
gc()
l <- Li
cmp <- compi
thisres <- list()
for(cn in l){
thisres <- unique(c(thisres,list(unique(sort((which(cmp$membership %in% cn)-1) %% vcount(g))))))
}
return(thisres)
}
)
res <- unlist(res,recursive=F)
## and see which are cutsets
res <- unique(res[unlist(lapply(res,function(x){length(x)==k & is.cutset(x,g)}))])
}else{
Gbar <- parLapply(cl,G,etReduction) #4 (parallel)
mflow <- unlist(parLapply(cl,1:nrow(P),function(i){graph.maxflow(Gbari,P[i,1]+vcount(g),P[i,2])}))
Gbar <- Gbar[mflow==k] #5 (parallel)
P <- P[mflow==k,]
if(nrow(P)<1) return(list())
GbarRes <- parLapply(cl,1:nrow(P),function(i){graph.residual(Gbari,P[i,1]+vcount(g),P[i,2])}) #6 (parallel)
comp <- parLapply(cl,GbarRes,clusters,mode="strong")
L <- parLapply(cl,1:length(comp),function(i){graph.shrink(GbarResi,comp=compi)})
tryL <- try(parLapply(cl,L,graph.antichains,cl=NULL),silent=T) #7 (parallel)
if(class(tryL)=="try-error"){ ## not sure why I need this, but Moody-White example was crashing with parLapply here
warning(paste("Parallel execution on MPI failed with message: ",tryL," Defaulted to sequential execution",sep=""))
L <- lapply(L,graph.antichains,cl=NULL)
cat("*")
}else{
L <- tryL
}
## convert antichains in L to vertex sets in g
res <- parLapply(cl,1:length(L),
function(i){
gc()
l <- Li
cmp <- compi
thisres <- list()
for(cn in l){
thisres <- unique(c(thisres,list(unique(sort((which(cmp$membership %in% cn)-1) %% vcount(g))))))
}
return(thisres)
}
)
res <- unlist(res,recursive=F)
## and see which are cutsets
res <- unique(res[unlist(parLapply(cl,res,function(x){length(x)==k & is.cutset(x,g)}))])
}
return(res)
}
graph.residual <- function(g,i,j){ ## NOT GENERIC: deletes edges once they get to nonpositive capacity
gc()
if(!("capacity" %in% list.edge.attributes(g))) E(g)$capacity <- 1
g.r <- simplify(g)
done <- F
while(!done){
p <- get.shortest.paths(g.r,i,j,mode="out")1
if(length(p)<1) done <- T
E(g.r,path=p)$capacity <- E(g.r,path=p)$capacity - 1
g.r <- delete.edges(g.r,E(g.r)[capacity<=0])
if(vertex.disjoint.paths(g.r,i,j)==0) done <- T
}
return(g.r)
}
graph.shrink <- function(g,comp=clusters(g,mode="strong"),remove.multiple=T,remove.loops=T){
gc()
res <- graph.empty(length(comp$csize),directed=T)
el <- get.edgelist(g)
el.comp <- rbind(comp$membership[el[,1]+1],comp$membership[el[,2]+1])
res <- add.edges(res,el.comp)
res <- simplify(res,remove.multiple=remove.multiple,remove.loops=remove.loops)
return(res)
}
graph.antichains <- function(g,cl=NULL){ ## g must be a directed acyclic graph
#############################################################################
###################################################### ## Examples of use for cohesive blocking. ## ######################################################
##### # 1 # Create a graph to work on: #####
## reconstruct the example from Moody, White (2003)
mwExample <- graph.empty(23,directed=F)
V(mwExample)$label <- 1:23
mwExample <- add.edges(mwExample,
c(1,2,
1,3,
1,4,
1,5,
1,6,
2,3,
2,4,
2,5,
2,7,
3,4,
3,6,
3,7,
4,5,
4,6,
4,7,
5,6,
5,7,
5,21,
6,7,
7,8,
7,11,
7,14,
7,19,
8,9,
8,11,
8,14,
9,10,
10,12,
10,13,
11,12,
11,14,
12,16,
13,16,
14,15,
15,16,
17,18,
17,19,
17,20,
18,20,
18,21,
19,20,
19,22,
19,23,
20,21,
21,22,
21,23,
22,23
)-1)
mw <- mwExample
mwExampleFixed <- add.edges(mwExample,c(19,21)) ## add an edge to make 3-component of {17-23} as in Moody-White
mw2 <- mwExampleFixed
##### # 2 # Run cohesive blocking algorithm: #####
mwBlocks <- cohesive.blocks(mw) mwBlocks2 <- cohesive.blocks(mw2)
##### # 3 # Plot the results: #####
plot(mwBlocks) # if a layout algorithm takes a long time for a # particular graph, it can be saved to the object: mwBlocks2 <- add.kk.layout(mwBlocks2) ## kamada kawai plot(mwBlocks2) mwBlocks2 <- add.svd.layout(mwBlocks2,s=1) ## single value decomposition plot(mwBlocks2)
##### # 4 # Save as Pajek files: #####
write.pajek.bgraph(mwBlocks,file="mwExample1") write.pajek.bgraph(mwBlocks2,file="mwExample2")
