Weeks 6 (Roles), 7 (Community), 9 (Network Tests)
All previous descriptives available here.
For the purposes of this analysis, I will be focusing only on Book 4, The Goblet of Fire.
#Set my wd to my own path to the data
setwd("~/DACCS R/Networks/HP Data/bossaert_meidert_harrypotter")
# Read data
book4 <- as.matrix(read.table("hpbook4.txt"))
hp.attributes <- as.matrix(read.table("hpattributes.txt", header=TRUE))
hp.name <- as.matrix(read.table("hpnames.txt", header=TRUE))
Next, I need to create network objects.
book4.ig <- graph.adjacency(book4)
book4.stat <- as.network.matrix(book4)
Remove the loops in the creation of the igraph object:
book4a.ig <- graph.adjacency(book4, diag = 0)
#plot(book4a.ig)
simple4ege <- get.edgelist(book4a.ig)
simple4a.ig <- graph.edgelist(simple4ege)
#plot(simple4a.ig)
Okay, so I’m going to try and replace the column names:
colnames(work4)
[1] "V1" "V2" "V3" "V4" "V5" "V6" "V7" "V8" "V9" "V10" "V11"
[12] "V12" "V13" "V14" "V15" "V16" "V17" "V18" "V19" "V20" "V21" "V22"
[23] "V23" "V24" "V25" "V26" "V27" "V28" "V29" "V30" "V31" "V32" "V33"
[34] "V34" "V35" "V36" "V37" "V38" "V39" "V40" "V41" "V42" "V43" "V44"
[45] "V45" "V46" "V47" "V48" "V49" "V50" "V51" "V52" "V53" "V54" "V55"
[56] "V56" "V57" "V58" "V59" "V60" "V61" "V62" "V63" "V64"
as.vector(names)
# A tibble: 1 × 64
` 1` ` 2` ` 3` ` 4` ` 5` ` 6` ` 7` ` 8` ` 9` `10` `11`
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 Adrian … Alic… Ange… Anth… Blai… C. W… Cedr… Cho … Coli… Corm… Dean…
# … with 53 more variables: `12` <chr>, `13` <chr>, `14` <chr>,
# `15` <chr>, `16` <chr>, `17` <chr>, `18` <chr>, `19` <chr>,
# `20` <chr>, `21` <chr>, `22` <chr>, `23` <chr>, `24` <chr>,
# `25` <chr>, `26` <chr>, `27` <chr>, `28` <chr>, `29` <chr>,
# `30` <chr>, `31` <chr>, `32` <chr>, `33` <chr>, `34` <chr>,
# `35` <chr>, `36` <chr>, `37` <chr>, `38` <chr>, `39` <chr>,
# `40` <chr>, `41` <chr>, `42` <chr>, `43` <chr>, `44` <chr>, …
[1] "Adrian Pucey" "Alicia Spinnet"
[3] "Angelina Johnson" "Anthony Goldstein"
[5] "Blaise Zabini" "C. Warrington"
[7] "Cedric Diggory" "Cho Chang"
[9] "Colin Creevey" "Cormac McLaggen"
[11] "Dean Thomas" "Demelza Robins"
[13] "Dennis Creevey" "Draco Malfoy"
[15] "Eddie Carmichael" "Eleanor Branstone"
[17] "Ernie Macmillan" "Euan Abercrombie"
[19] "Fred Weasley" "George Weasley"
[21] "Ginny Weasley" "Graham Pritchard"
[23] "Gregory Goyle" "Hannah Abbott"
[25] "Harry James Potter" "Hermione Granger"
[27] "Jimmy Peakes" "Justin Finch-Fletchley"
[29] "Katie Bell" "Kevin Whitby"
[31] "Lavender Brown" "Leanne"
[33] "Lee Jordan" "Lucian Bole"
[35] "Luna Lovegood" "Malcolm Baddock"
[37] "Mandy Brocklehurst" "Marcus Belby"
[39] "Marcus Flint" "Michael Corner"
[41] "Miles Bletchley" "Millicent Bulstrode"
[43] "Natalie McDonald" "Neville Longbottom"
[45] "Oliver Wood" "Orla Quirke"
[47] "Owen Cauldwell" "Padma Patil"
[49] "Pansy Parkinson" "Parvati Patil"
[51] "Penelope Clearwater" "Percy Weasley"
[53] "Peregrine Derrick" "Roger Davies"
[55] "Romilda Vane" "Ronald Weasley"
[57] "Rose Zeller" "Seamus Finnigan"
[59] "Stewart Ackerley" "Susan Bones"
[61] "Terry Boot" "Theodore Nott"
[63] "Vincent Crabbe" "Zacharias Smith"
HOLY CRAP I THINK I DID IT
#work4 is our working dataframe - convert to a matrix
work4 <- as.matrix(work4)
#convert to graph object
work4.ig <- graph.adjacency(work4, diag = 0)
#now we're going to get only the connected part of the graph
work4edge <- get.edgelist(work4.ig)
work4_simple.ig <- graph.edgelist(work4edge)
Plot:
ggraph(work4_simple.ig, layout = 'lgl') +
geom_edge_arc(color="gray", strength=0.3, arrow = arrow(length = unit(4, 'mm'))) +
geom_node_point() +
geom_node_text(aes(label = name), size=3, repel=T) +
theme_void() +
labs(title = "Student Support Network in Harry Potter and the Goblet of Fire")
book4a.ig <- graph.adjacency(book4, diag = 0)
#First attempt - guessing I actually already did this, oh well!
work4_adjacency<-as.matrix(as_adjacency_matrix(work4.ig))
old4 <- work4_adjacency
old4.ig <- graph.adjacency(old4)
class(old4.ig)
[1] "igraph"
new4.ig <- igraph::delete.vertices(old4.ig, which(igraph::degree(old4.ig)==0))
#plot(new4.ig)
Create Nodes Dataframe
Note to self: use new4.ig, new4.stat
#Create nodes df for new and add all degree
#Confirm degree is numeric
new4.nodes<-data.frame(name=V(new4.ig)$name, degree=igraph::degree(new4.ig))
#add in degree and out degree
new4.nodes <- new4.nodes %>%
mutate(indegree=igraph::degree(new4.ig, mode="in", loops=FALSE),
outdegree=igraph::degree(new4.ig, mode="out", loops=FALSE))
#add eigenCentrality
newEigen <- centr_eigen(new4.ig, directed = T)
newEigen$vector
[1] 2.460367e-01 6.053405e-02 4.724799e-17 3.924982e-17 9.394659e-01
[6] 9.394659e-01 2.311431e-01 1.000000e+00 9.394659e-01 7.083229e-01
[11] 4.650211e-17 9.394659e-01
newEigen$centralization
[1] 0.5451
new4.eigen<-cbind(newEigen$vector,V(new4.ig)$name)
new4.eigenDF <- data_frame(new4.eigen)
#add to our nodes
new4.nodes <- new4.nodes %>%
mutate(eigenCentrality = new4.eigen)
Calculate derived and reflected centrality and add to nodes df
old4_adj <- as.matrix(graph.adjacency(old4))
new4_adj <- as.matrix(as_adjacency_matrix(new4.ig))
old4.stat <- as.network.matrix(old4_adj)
new4.stat <- as.network.matrix(new4_adj)
print(new4.stat)
Network attributes:
vertices = 12
directed = TRUE
hyper = FALSE
loops = FALSE
multiple = FALSE
bipartite = FALSE
total edges= 34
missing edges= 0
non-missing edges= 34
Vertex attribute names:
vertex.names
No edge attributes
#Matrix to 2nd power for two step derived/reflected centrality measurement.
new4_adjacency2<-new4_adj %*% new4_adj
#Calculate the proportion of reflected centrality.
new_rc<-diag(as.matrix(new4_adjacency2))/rowSums(as.matrix(new4_adjacency2))
new_rc<-ifelse(is.nan(new_rc),0,new_rc)
head(new_rc)
Cedric Diggory Cho Chang Colin Creevey Dennis Creevey
0.1428571 0.0000000 0.1250000 0.1250000
Fred Weasley George Weasley
0.2000000 0.1904762
#Calculate the proportion of derived centrality.
new_dc<-1-diag(as.matrix(new4_adjacency2))/rowSums(as.matrix(new4_adjacency2))
new_dc<-ifelse(is.nan(new_dc),1,new_dc)
head(new_dc)
Cedric Diggory Cho Chang Colin Creevey Dennis Creevey
0.8571429 1.0000000 0.8750000 0.8750000
Fred Weasley George Weasley
0.8000000 0.8095238
Now we’ll add in our closesness, betweeness and brokerage scores:
#calculate closeness centrality: igraph
closeIG <- igraph::closeness(new4.ig)
head(igraph::closeness(new4.ig))
Cedric Diggory Cho Chang Colin Creevey Dennis Creevey
0.06666667 0.06250000 0.05000000 0.05000000
Fred Weasley George Weasley
0.08333333 0.07692308
[1] 0 0 0 0 0 0
closeIG_df <- data_frame(closeIG)
new4.nodes <- new4.nodes %>%
mutate(closeness = closeIG_df$closeIG)
#Betweeness Centrality
head(igraph::betweenness(new4.ig, directed=TRUE, weights=NA))
Cedric Diggory Cho Chang Colin Creevey Dennis Creevey
8 0 0 0
Fred Weasley George Weasley
9 0
betweenIG <- igraph::betweenness(new4.ig, directed=TRUE, weights=NA)
betweenIG_df <- data_frame(betweenIG)
new4.nodes <- new4.nodes %>%
mutate(between = betweenIG_df$betweenIG)
Adding constraint
constraintIG <- constraint(new4.ig)
constraintIG_df <- data_frame(constraintIG)
new4.nodes <- new4.nodes %>%
mutate(constraint = constraintIG_df$constraintIG)
Brokerage:
#return matrix of standardized brokerage scores
head(brokerage(new4.stat, cl = new4.nodes$name)$z.nli)
w_I w_O b_IO b_OI b_O t
Cedric Diggory NaN NaN NaN NaN -1.0136651 -1.0136651
Cho Chang NaN NaN NaN NaN -1.2430947 -1.2430947
Colin Creevey NaN NaN NaN NaN -1.2430947 -1.2430947
Dennis Creevey NaN NaN NaN NaN -1.2430947 -1.2430947
Fred Weasley NaN NaN NaN NaN -0.3253764 -0.3253764
George Weasley NaN NaN NaN NaN -1.2430947 -1.2430947
Quick Reminder of the types of brokerage:
b_O: Liaison role; the broker mediates contact between two individuals from different groups, neither of which is the group to which he or she belongs. Two-path structure: A -> B -> C
In this evaluation, liaison and total brokerage are the same, so we’ll add it to the nodes df as liaison/total
brokerage <- brokerage(new4.stat, cl = new4.nodes$name)$z.nli
brokerage_df <- as.data.frame(brokerage)
new4.nodes <- new4.nodes %>%
mutate(liaison_total = brokerage_df$b_O)
##Calculate Structural Equivalence
In calculating structural equivalence, we are interested in identifying nodes that have the same pattern of ties with the same neighbors. The statnet function equiv.clust() compresses the first two steps of role detection–creating a distance (similarity) matrix and then clustering the nodes on the basis of that matrix–into a single function. The distance matrix is calculated based on the option method= and can be set to different distance functions such as hamming, correlation, hamming, or gamma when using structural equivalence equiv.fun="sedist" as the basis for clustering.
#calculate equivalence from specified distance marix
hp.se<-equiv.clust(new4.stat, equiv.fun="sedist", method="hamming",mode="graph")
summary(hp.se)
Length Class Mode
cluster 7 hclust list
metric 1 -none- character
equiv.fun 1 -none- character
cluster.method 1 -none- character
glabels 12 -none- character
plabels 12 -none- character
#plot equivalence clustering
plot(hp.se,labels=hp.se$glabels)
#with average cluster.method
hp.avg.se<-equiv.clust(new4.stat, equiv.fun="sedist", cluster.method="average", method="hamming",mode="graph")
#plot:
plot(hp.avg.se,labels=hp.se$glabels)
And single:
#with single cluster.method
hp.sing.se<-equiv.clust(new4.stat, equiv.fun="sedist", cluster.method="single", method="hamming",mode="graph")
#plot:
plot(hp.sing.se,labels=hp.se$glabels)
And ward.D:
#with ward.D cluster.method
hp.wrd.se<-equiv.clust(new4.stat, equiv.fun="sedist", cluster.method="ward.D", method="hamming",mode="graph")
#plot:
plot(hp.wrd.se,labels=hp.se$glabels)
Partition a Matrix Using Clustering
#plot equivalence clustering
plot(hp.se,labels=hp.se$glabels)
#partition the clusters
rect.hclust(hp.se$cluster,h=15)
Discussion
This is the most interesting visualization to me, and makes sense within the context of the plot of Book 4, though I am surprised that Cedric doesn’t cluster more closely with Harry. Perhaps because this is an unweighted network - so it’s not about how much support one character offers another, it’s about whether or not they offer support at all. Other than that, this split does make sense to me. Fred, George, Ron, Hermione and Harry all interact in various dyads, while the other cluster is more peripheral to them, and also does not offer support to each other the way that the main characters do.
#plot equivalence clustering
plot(hp.se,labels=hp.se$glabels)
#partition the clusters
rect.hclust(hp.se$cluster,h=5)
To implement a blockmodel in statnet, we must specify the original network, the equivalence clustering object to use, and the number of partitions. The option k= in the blockmodel command indicates how many partitions to use in creating the blockmodel. Alternatively, the option h= indicates the height at which to cut the dendrogram.
#blockmodel and select partitions
blk_mod<-blockmodel(new4.stat,hp.se,k=2)
#print blockmodel object
blk_mod
Network Blockmodel:
Block membership:
Cedric Diggory Cho Chang Colin Creevey
1 1 1
Dennis Creevey Fred Weasley George Weasley
1 2 2
Ginny Weasley Harry James Potter Hermione Granger
1 2 2
Neville Longbottom Percy Weasley Ronald Weasley
1 1 2
Reduced form blockmodel:
Cedric Diggory Cho Chang Colin Creevey Dennis Creevey Fred Weasley George Weasley Ginny Weasley Harry James Potter Hermione Granger Neville Longbottom Percy Weasley Ronald Weasley
Block 1 Block 2
Block 1 0.07142857 0.1714286
Block 2 0.14285714 1.0000000
Plot blockmodel
plot.block<-function(x=blk_mod, main=NULL, cex.lab=1){
plot.sociomatrix(x$blocked.data, labels=list(x$plabels,x$plabels),
main=main, drawlines = FALSE, cex.lab=cex.lab)
for (j in 2:length(x$plabels)) if (x$block.membership[j] !=
x$block.membership[j-1])
abline(v = j - 0.5, h = j - 0.5, lty = 3, xpd=FALSE)
}
#blockmodel and select partitions
blk_mod<-blockmodel(new4.stat,hp.se,k=2)
#plot partitions
plot.block(blk_mod,main="Harry Potter Support Network: 2 Partitions", cex.lab=.5)
plot the 5 partitions isolated in the earlier exercise.
#blockmodel and select partitions
blk_mod5<-blockmodel(new4.stat,hp.se,k=5)
#print blockmodel object
blk_mod5$block.model
Block 1 Block 2 Block 3 Block 4 Block 5
Block 1 0.15 0.05 0.00 1 0.0
Block 2 0.00 1.00 0.25 1 0.5
Block 3 0.00 0.00 NaN 0 0.0
Block 4 0.20 1.00 0.00 NaN 1.0
Block 5 0.00 0.00 0.00 0 NaN
#plot blockmodel partitions
plot.block(blk_mod5,main="Harry Potter Support Network, 5 Partitions", cex.lab=.5)
Because the support network is so small, I do not think it makes sense to have more than 2 blocks.
Plot Network Roles
When we create a blockmodel object, the component block.membership is used to tell us which role (or block) each node has been assigned. It can be useful to take this information and superimpose it on a traditional network plot. To do this, we will first assign blk_mod$block.membership as a vertex attribute. Because of the fact that this network is so small, I will be using a k =2.
#blockmodel and select partitions
blk_modRoles<-blockmodel(new4.stat,hp.se,k=2)
#assign block membership to vertex attribute
V(new4.ig)$role<-blk_mod$block.membership[match(V(new4.ig)$name,blk_mod$plabels)]
#plot network using "role" to color nodes: statnet
GGally::ggnet2(new4.stat,
node.color="role",
node.size=degree(new4.stat, gmode="graph"),
node.label = "vertex.names",
node.alpha = .5)
#plot network using "role" to color nodes: igraph
plot.igraph(new4.ig,
vertex.color=V(new4.ig)$role,
vertex.size=8+(igraph::degree(new4.ig)*4))
Centrality by Network Role
###Repeat the blockmodeling and creating the vertex attribute to be able to attach to the .nodes element.
#blockmodel and select partitions
blk_mod<-blockmodel(new4.stat, hp.se, k=2)
#assign block membership to vertex attribute
V(new4.ig)$role<-blk_mod$block.membership[match(V(new4.ig)$name,blk_mod$plabels)]
#see part 3 of basic network structure to create a .nodes dataframe
#attach role to .nodes dataframe
new4.nodes$role<-V(new4.ig)$role
#summarize various network statistics by role
new4.nodes%>%
select(-name)%>% group_by(role)%>%
mutate(n=n())%>%
summarise_all(mean, na.rm=TRUE)%>%
as.matrix()%>%
print(digits=2)
role degree indegree outdegree eigenCentrality derCentrality
[1,] 1 2.4 1.1 1.3 NA 0.94
[2,] 2 10.2 5.2 5.0 NA 0.79
reflectedCentrality closeness between constraint liaison_total n
[1,] 0.056 0.057 1.1 0.82 -1.21 7
[2,] 0.206 0.085 11.0 0.52 0.87 5
Regular Equivalency - unweighted network
#calculate equivalence from specified distance matrix
hp.re<-equiv.clust(new4.stat, equiv.fun="redist", method="catrege", mode="graph")
#plot equivalence clustering
plot(hp.re,labels=hp.se$glabels)
Hmm. They all have the exact same position? As the tutorial suggests, this is pretty deeply not interesting. I think the clustering and blockmodeling is much more interesting!
fast and greedy
#Run clustering algorithm: fast_greedy
new4U.ig <- as.undirected(new4.ig)
comm.fg<-cluster_fast_greedy(new4U.ig)
#Inspect clustering object
names(comm.fg)
[1] "merges" "modularity" "membership" "names" "algorithm"
[6] "vcount"
comm.fg
IGRAPH clustering fast greedy, groups: 2, mod: 0.21
+ groups:
$`1`
[1] "Fred Weasley" "George Weasley" "Ginny Weasley"
[4] "Hermione Granger" "Neville Longbottom" "Percy Weasley"
[7] "Ronald Weasley"
$`2`
[1] "Cedric Diggory" "Cho Chang" "Colin Creevey"
[4] "Dennis Creevey" "Harry James Potter"
This is really interesting in contrast to our block models.
In order to see a complete list of which nodes belong to which clusters, we can easily retrieve this information using groups.
#retrieve list of nodes in communities
igraph::groups(comm.fg)
$`1`
[1] "Fred Weasley" "George Weasley" "Ginny Weasley"
[4] "Hermione Granger" "Neville Longbottom" "Percy Weasley"
[7] "Ronald Weasley"
$`2`
[1] "Cedric Diggory" "Cho Chang" "Colin Creevey"
[4] "Dennis Creevey" "Harry James Potter"
block model with community density
#blockmodel with community membership
blockmodel(new4.stat,comm.fg$membership)
Network Blockmodel:
Block membership:
1 2 3 4 5 6 7 8 9 10 11 12
2 2 2 2 1 1 1 2 1 1 1 1
Reduced form blockmodel:
1 2 3 4 5 6 7 8 9 10 11 12
Block 1 Block 2
Block 1 0.3809524 0.1428571
Block 2 0.1428571 0.4000000
#only retrieve block density object
blockmodel(new4.stat,comm.fg$membership)$block.model
Block 1 Block 2
Block 1 0.3809524 0.1428571
Block 2 0.1428571 0.4000000
#print block densities using only 2 digits for readability
print(blockmodel(new4.stat,comm.fg$membership)$block.model, digits=2)
Block 1 Block 2
Block 1 0.38 0.14
Block 2 0.14 0.40
Adding Community Membership to Node Info
#add community membership as a node attribute
new4.nodes$comm.fg<-comm.fg$membership
#summarize node statistics by community
new4.nodes%>%
select(-name)%>% group_by(comm.fg)%>%
mutate(n=n())%>%
summarise_all(mean, na.rm=TRUE)%>%
as.matrix()%>%
print(digits=2)
comm.fg degree indegree outdegree eigenCentrality derCentrality
[1,] 1 6.0 3.0 3.0 NA 0.89
[2,] 2 5.2 2.6 2.6 NA 0.87
reflectedCentrality closeness between constraint liaison_total
[1,] 0.11 0.076 1.8 0.65 -0.85
[2,] 0.13 0.066 10.0 0.75 0.36
role n
[1,] 1.6 7
[2,] 1.2 5
Okay, the thing that’s interesting again is how different the communities are from the blocks!
Plot Network with Community Coloring
#plot network with community coloring
plot(comm.fg,new4.ig)
Walktrap
#Run & inspect clustering algorithm: 10 steps
igraph::groups(walktrap.community(new4.ig, steps=10))
$`1`
[1] "Fred Weasley" "George Weasley" "Ginny Weasley"
[4] "Hermione Granger" "Neville Longbottom" "Percy Weasley"
[7] "Ronald Weasley"
$`2`
[1] "Cedric Diggory" "Cho Chang" "Harry James Potter"
$`3`
[1] "Colin Creevey" "Dennis Creevey"
#Run & inspect clustering algorithm: 20 steps
igraph::groups(walktrap.community(new4.ig ,steps=20))
$`1`
[1] "Fred Weasley" "George Weasley" "Ginny Weasley"
[4] "Hermione Granger" "Neville Longbottom" "Percy Weasley"
[7] "Ronald Weasley"
$`2`
[1] "Cedric Diggory" "Cho Chang" "Harry James Potter"
$`3`
[1] "Colin Creevey" "Dennis Creevey"
#Run & inspect clustering algorithm
igraph::groups(walktrap.community(new4.ig, steps=100))
$`1`
[1] "Cedric Diggory" "Cho Chang" "Colin Creevey"
[4] "Dennis Creevey" "Harry James Potter"
$`2`
[1] "Fred Weasley" "George Weasley" "Ginny Weasley"
[4] "Hermione Granger" "Neville Longbottom" "Percy Weasley"
[7] "Ronald Weasley"
Why does 100 steps give us 2 communities but the other two give us 3, identical communities?
Why is Harry in a community with Cedric and Cho? But in blocks with Ron, Hermione, Fred and George?
inspect nodes
``{r} #inspect density of between/within community ties print(blockmodel(new4.stat,comm.wt\(membership)\)block.model, digits=2) #add community membership as a vertex attribute new4.nodes\(comm.wt<-comm.wt\)membership #summarize node statistics by community nodes.by.gp(new4.nodes,“comm.wt”)
Note: this code doesn't seem to work for me here, so I'm going to uncode the chunk.
``{r}
#plot network with community coloring
plot(comm.wt,new4.ig)
[Also failing - so I’m going to uncode the chunk]
``{r}
#compare community partition modularity scores modularity(comm.fg) modularity(comm.wt)
At this point, I'm going to move on to Week 9 because all the tutorial code is failing.
* going to close and submit this for 6 & 7 since no new code chunks will run and I have no idea why??
```{.r .distill-force-highlighting-css}