library(igraph)
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(readr)
library(threejs)
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
library(knitr)
library(knitLatex)
dataEdge1 <- read_csv('chapter1Edges.csv')
## Parsed with column specification:
## cols(
## source = col_character(),
## target = col_character(),
## value = col_integer()
## )
dataVertices1 <- read_csv('chapter1Vertices.csv')
## Parsed with column specification:
## cols(
## source = col_character(),
## appearance = col_double(),
## color = col_character()
## )
dataEdge2 <- read_csv('chapter2Edges.csv')
## Parsed with column specification:
## cols(
## source = col_character(),
## target = col_character(),
## value = col_integer()
## )
dataVertices2 <- read_csv('chapter2Vertices.csv')
## Parsed with column specification:
## cols(
## source = col_character(),
## appearance = col_double(),
## color = col_character()
## )
dataEdge3 <- read_csv('chapter3Edges.csv')
## Parsed with column specification:
## cols(
## source = col_character(),
## target = col_character(),
## value = col_integer()
## )
dataVertices3 <- read_csv('chapter3Vertices.csv')
## Parsed with column specification:
## cols(
## source = col_character(),
## appearance = col_double(),
## color = col_character()
## )
dataEdge4 <- read_csv('chapter4Edges.csv')
## Parsed with column specification:
## cols(
## source = col_character(),
## target = col_character(),
## value = col_integer()
## )
dataVertices4 <- read_csv('chapter4Vertices.csv')
## Parsed with column specification:
## cols(
## source = col_character(),
## appearance = col_double(),
## color = col_character()
## )
dataEdge5 <- read_csv('chapter5Edges.csv')
## Parsed with column specification:
## cols(
## source = col_character(),
## target = col_character(),
## value = col_integer()
## )
dataVertices5 <- read_csv('chapter5Vertices.csv')
## Parsed with column specification:
## cols(
## source = col_character(),
## appearance = col_double(),
## color = col_character()
## )
dataEdge6 <- read_csv('chapter6Edges.csv')
## Parsed with column specification:
## cols(
## source = col_character(),
## target = col_character(),
## value = col_integer()
## )
dataVertices6 <- read_csv('chapter6Vertices.csv')
## Parsed with column specification:
## cols(
## source = col_character(),
## appearance = col_double(),
## color = col_character()
## )
dataEdge7 <- read_csv('chapter7Edges.csv')
## Parsed with column specification:
## cols(
## source = col_character(),
## target = col_character(),
## value = col_integer()
## )
dataVertices7 <- read_csv('chapter7Vertices.csv')
## Parsed with column specification:
## cols(
## source = col_character(),
## appearance = col_double(),
## color = col_character()
## )
dataEdgeAll <- read_csv('AllchapterEdges.csv')
## Parsed with column specification:
## cols(
## source = col_character(),
## target = col_character(),
## value = col_integer()
## )
dataVerticesAll <- read_csv('AllchapterVertices.csv')
## Parsed with column specification:
## cols(
## source = col_character(),
## appearance = col_double(),
## color = col_character()
## )
Adding information about the Actors like appearance on the screen and number of conversations
info_detail <- function(df_ver, df_edg){
edg1 <- sqldf("select source,SUM(value) as value1 from df_edg group by source")
edg2 <- sqldf("select target,SUM(value) as value2 from df_edg group by target")
edg_tot <- merge(edg1,edg2,by.x=c("source"),by.y = c("target"),all = TRUE)
edg_tot[is.na(edg_tot)] <- 0
edg_tot$total_edges <- edg_tot$value1+edg_tot$value2
edg_tot <- subset(edg_tot,select = c(source,total_edges))
final <- merge(edg_tot,df_ver,by="source",all = TRUE)
final[is.na(final)] <- 0
final$all_info <- paste(final$source,", # conversation -",final$total_edges,", # appearance -",final$appearance,sep = " ")
return(final)}
dataVertices1 <- info_detail(dataVertices1, dataEdge1)
dataVertices2 <- info_detail(dataVertices2, dataEdge2)
dataVertices3 <- info_detail(dataVertices3, dataEdge3)
dataVertices4 <- info_detail(dataVertices4, dataEdge4)
dataVertices5 <- info_detail(dataVertices5, dataEdge5)
dataVertices6 <- info_detail(dataVertices6, dataEdge6)
dataVertices7 <- info_detail(dataVertices7, dataEdge7)
dataVerticesAll <- info_detail(dataVerticesAll, dataEdgeAll)
g1 <- graph_from_data_frame(d = dataEdge1, vertices = dataVertices1, directed = FALSE)
g2 <- graph_from_data_frame(d = dataEdge2, vertices = dataVertices2, directed = FALSE)
g3 <- graph_from_data_frame(d = dataEdge3, vertices = dataVertices3, directed = FALSE)
g4 <- graph_from_data_frame(d = dataEdge4, vertices = dataVertices4, directed = FALSE)
g5 <- graph_from_data_frame(d = dataEdge5, vertices = dataVertices5, directed = FALSE)
g6 <- graph_from_data_frame(d = dataEdge6, vertices = dataVertices6, directed = FALSE)
g7 <- graph_from_data_frame(d = dataEdge7, vertices = dataVertices7, directed = FALSE)
gAll <- graph_from_data_frame(d = dataEdgeAll, vertices = dataVerticesAll, directed = FALSE)
#StarWars Episode 1: The Phantom Menace
plot(g1, vertex.size = 0.7*dataVertices1$appearance, vertex.label.cex = 0.6, vertex.color = dataVertices1$color, layout = layout.circle(g1), edge.width = 0.5*dataEdge1$value, vertex.frame.color = 'tomato', vertex.label.dist = 4, edge.curved = 0.25,edge.color = 'orange',rescale=TRUE, vertex.shape= "sphere", frame = TRUE, main = 'StarWars Episode 1: The Phantom Menace')
#StarWars Episode 2: Attack of the Clones
plot(g2, vertex.size = 0.7*dataVertices2$appearance, vertex.label.cex = 0.6, vertex.color = dataVertices2$color, layout = layout.circle(g2), edge.width = 0.5*dataEdge2$value, vertex.frame.color = 'tomato', vertex.label.dist = 4, edge.curved = 0.25,edge.color = 'orange', rescale=TRUE, frame = TRUE, vertex.shape= "sphere", main = 'StarWars Episode 2: Attack of the Clones')
#StarWars Episode 3: Revenge of the Sith
plot(g3, vertex.size = 0.7*dataVertices3$appearance, vertex.label.cex = 0.6, vertex.color = dataVertices3$color, layout = layout.circle(g3), edge.width = 0.5*dataEdge3$value,vertex.frame.color = 'tomato', vertex.label.dist = 4, edge.curved = 0.25, edge.color = 'orange', frame = TRUE, rescale=TRUE, vertex.shape= "sphere", main = 'StarWars Episode 3: Revenge of the Sith')
#StarWars Episode 4: A New Hope
plot(g4, vertex.size = 0.7*dataVertices4$appearance, vertex.label.cex = 0.6, vertex.color = dataVertices4$color, layout = layout.circle(g4), edge.width = 0.5*dataEdge4$value,vertex.frame.color = 'tomato', vertex.label.dist = 4, edge.curved = 0.25,edge.color = 'orange', frame = TRUE, rescale=TRUE,vertex.shape= "sphere", main = 'StarWars Episode 4: A New Hope')
#StarWars Episode 5: The Empire Strikes Back
plot(g5, vertex.size = 0.7*dataVertices5$appearance, vertex.label.cex = 0.6, vertex.color = dataVertices5$color, layout = layout.circle(g5), edge.width = 0.5*dataEdge5$value,vertex.frame.color = 'tomato', vertex.label.dist = 4, edge.curved = 0.25, edge.color = 'orange', rescale=TRUE, frame = TRUE, vertex.shape= "sphere", main = 'StarWars Episode 5: The Empire Strikes Back')
#StarWars Episode 6: Return of the Jedi
plot(g6, vertex.size = 0.7*dataVertices6$appearance, vertex.label.cex = 0.6, vertex.color = dataVertices6$color, layout = layout.circle(g6), edge.width = 0.5*dataEdge6$value,vertex.frame.color = 'tomato', vertex.label.dist = 4, edge.curved = 0.25, edge.color = 'orange', frame = TRUE, rescale=TRUE, vertex.shape= "sphere", main = 'StarWars Episode 6: Return of the Jedi')
#StarWars Episode 7: The force Awakens
plot(g7, vertex.size = 0.7*dataVertices7$appearance, vertex.label.cex = 0.6, vertex.color = dataVertices7$color, layout = layout.circle(g7), edge.width = 0.5*dataEdge7$value,vertex.frame.color = 'tomato', vertex.label.dist = 4, edge.curved = 0.25, edge.color = 'orange', rescale=TRUE, frame = TRUE, vertex.shape= "sphere", main = 'StarWars Episode 7: The force Awakens')
#StarWars Episodes 1 to 7
plot(gAll, vertex.size = 0.3*dataVerticesAll$appearance, vertex.label.cex = 0.6, vertex.color = dataVerticesAll$color, layout = layout.circle(gAll),edge.color = "orange", edge.width = 0.5*dataEdgeAll$value/5, vertex.frame.color = 'tomato', vertex.label.dist = 4, edge.curved = 0.25,rescale=TRUE, frame = TRUE, vertex.shape= "sphere", main = 'StarWars Episodes 1 to 7')
#creating new dataframe for Actors who conversed more than 30 times in episodes 1 to 7 combined
df <- dataVerticesAll[order(-dataVerticesAll$total_edges),]
df <- df[df$total_edges>=20,]
df <- df[df$appearance>=20,]
#using variable to extract the conversation from the edge dataframe
nam <- df$source
samp1 <- subset(dataEdgeAll,source %in% nam)
samp2 <- subset(dataEdgeAll,target %in% nam)
total_rec <- rbind(samp1,samp2)
total_rec <- unique(total_rec)
total_rec <- subset(total_rec,source %in% nam)
total_rec <- subset(total_rec,target %in% nam)
#creating graph object
g30 <- graph_from_data_frame(d = total_rec , vertices = df, directed = FALSE)
#interactive graph
plot(g30, vertex.size = 0.3*V(g30)$appearance, vertex.label.cex = 0.6, vertex.color = V(g30)$color, layout = layout.circle(g30),edge.color = "orange", edge.width = 0.5*E(g30)$value/4, vertex.frame.color = 'tomato', vertex.label.dist = 4, edge.curved = 0.25,rescale=TRUE, frame = TRUE, vertex.shape= "sphere", main = 'StarWars Episodes 1 to 7')
#creating network graph.
graphjs(g30, vertex.size = 0.1*V(g30)$appearance/2 , vertex.color = V(g30)$color,edge.color = "gold", main = 'StarWars Episodes 1 to 7 (cleaned)', layout = layout_on_sphere, brush = TRUE, vertex.shape= "sphere", vertex.label = V(g30)$all_info, edge.curved = 0.25 )
graphjs(g1, vertex.size = 0.2* dataVertices1$appearance, edge.color = "orange", showLabels = TRUE, edge.size =dataEdge1$value, brush=TRUE, vertex.label = dataVertices1$all_info, edge.curved = 0.2,rescale=FALSE, vertex.shape= "sphere", layout = layout_on_sphere, main = 'StarWars Episode 1: The Phantom Menace' )
# Community detection
abc <- edge.betweenness.community(g2)
sizes(abc)
## Community sizes
## 1 2 3 4 5 6 7 8
## 12 11 4 1 1 1 1 1
membership(abc)
## ANAKIN BAIL ORGANA BERU BOBA FETT
## 1 2 1 3
## C-3PO CAPTAIN TYPHO CLIEGG COUNT DOOKU
## 1 2 1 2
## EMPEROR JANGO FETT JAR JAR JOBAL
## 2 3 2 1
## KI-ADI-MUNDI LAMA SU MACE WINDU NUTE GUNRAY
## 2 3 2 1
## OBI-WAN ORN FREE TAA OWEN PADME
## 2 2 1 1
## PK-4 PLO KOON POGGLE RUWEE
## 4 5 1 1
## SENATOR ASK AAK SHMI SIO BIBBLE SOLA
## 2 6 7 1
## SUN RIT TAUN WE WATTO YODA
## 1 3 8 2
plot(abc,g2, main = 'Community detection: Edge.betweenness', vertex.shape= "sphere")
legend(x=-1.5, y= -1.1, c('Community 1','Community 2','Community 3','Community 4','Community 5','Community 6','Community 7','Community 8'), pch = 21,pt.bg = c("yellow","red","blue","green","brown","skyblue","magenta","orange"), pt.cex=2, cex=.8, ncol=2 )
mem <- membership(abc)
gc<- set_vertex_attr(g2,"color1", value = c("yellow","red","blue","green","brown","skyblue","magenta","orange")[mem])
graphjs(gc,vertex.label = V(gc)$all_info, vertex.size =3, vertex.label.cex = 0.4, vertex.color = V(gc)$color1, rescale=FALSE, brush = TRUE, main = 'Community detection: Edge.betweenness', vertex.shape= "sphere" )
# Community detection
abc <- fastgreedy.community(g2)
sizes(abc)
## Community sizes
## 1 2 3
## 11 12 9
membership(abc)
## ANAKIN BAIL ORGANA BERU BOBA FETT
## 2 3 2 1
## C-3PO CAPTAIN TYPHO CLIEGG COUNT DOOKU
## 2 1 2 1
## EMPEROR JANGO FETT JAR JAR JOBAL
## 3 1 3 2
## KI-ADI-MUNDI LAMA SU MACE WINDU NUTE GUNRAY
## 3 1 3 1
## OBI-WAN ORN FREE TAA OWEN PADME
## 1 3 2 2
## PK-4 PLO KOON POGGLE RUWEE
## 1 3 1 2
## SENATOR ASK AAK SHMI SIO BIBBLE SOLA
## 3 2 2 2
## SUN RIT TAUN WE WATTO YODA
## 1 1 2 3
plot(abc,g2, main = 'Community detection: fastgreedy', vertex.shape= "sphere" )
legend(x=-1.5, y= -1.1, c('Community 1','Community 2','Community 3'), pch = 21,pt.bg = c("yellow","red","blue"), pt.cex=2, cex=.8, ncol=2 )
mem <- membership(abc)
gc<- set_vertex_attr(g2,"color", value = c("yellow","red","blue")[mem])
graphjs(gc,vertex.label = V(gc)$all_info, vertex.size =3, vertex.label.cex = 0.4, edge.curved = 0.2,rescale=FALSE, brush = TRUE, main = 'Community detection: fastgreedy', vertex.shape= "sphere" )
# Find ANAKIN's connection
neigh <- neighbors(g1,"ANAKIN", mode = c("all"))
neigh
## + 22/37 vertices, named, from 56c20f4:
## [1] BOSS NASS BRAVO THREE BRAVO TWO C-3PO
## [5] CAPTAIN PANAKA GREEDO JABBA JAR JAR
## [9] JIRA KI-ADI-MUNDI KITSTER MACE WINDU
## [13] OBI-WAN PADME QUI-GON RABE
## [17] RIC OLIE SEBULBA SHMI WALD
## [21] WATTO YODA
# Degree and betweenness
deg <- igraph::degree(g1,mode=c("all"))
deg
## ANAKIN BAIL ORGANA BOSS NASS BRAVO THREE BRAVO TWO
## 22 2 6 3 3
## C-3PO CAPTAIN PANAKA DARTH MAUL DOFINE EMPEROR
## 5 9 4 3 11
## FODE/BEED GENERAL CEEL GREEDO JABBA JAR JAR
## 3 4 3 7 18
## JIRA KI-ADI-MUNDI KITSTER MACE WINDU NUTE GUNRAY
## 3 6 8 6 10
## OBI-WAN PADME PK-4 QUI-GON RABE
## 13 17 1 25 5
## RIC OLIE RUNE SEBULBA SHMI SIO BIBBLE
## 7 4 6 8 7
## TARPALS TC-14 TEY HOW VALORUM WALD
## 1 5 3 4 5
## WATTO YODA
## 5 6
which.max(deg)
## QUI-GON
## 24
igraph::betweenness(g1,directed = FALSE, normalized= TRUE)
## ANAKIN BAIL ORGANA BOSS NASS BRAVO THREE BRAVO TWO
## 0.1665621636 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## C-3PO CAPTAIN PANAKA DARTH MAUL DOFINE EMPEROR
## 0.0000000000 0.0097659131 0.0005291005 0.0019047619 0.1029209327
## FODE/BEED GENERAL CEEL GREEDO JABBA JAR JAR
## 0.0000000000 0.0065154951 0.0000000000 0.0056122449 0.1517886919
## JIRA KI-ADI-MUNDI KITSTER MACE WINDU NUTE GUNRAY
## 0.0000000000 0.0003174603 0.0046296296 0.0003174603 0.1279692618
## OBI-WAN PADME PK-4 QUI-GON RABE
## 0.0465898211 0.0969634554 0.0000000000 0.3191570467 0.0000000000
## RIC OLIE RUNE SEBULBA SHMI SIO BIBBLE
## 0.0253633844 0.0037566138 0.0000000000 0.0015873016 0.0045250693
## TARPALS TC-14 TEY HOW VALORUM WALD
## 0.0000000000 0.0752519526 0.0007936508 0.0161581804 0.0021315193
## WATTO YODA
## 0.0000000000 0.0003174603
#Eigen centrality
eigen_centrality(g1)$vector
## ANAKIN BAIL ORGANA BOSS NASS BRAVO THREE BRAVO TWO
## 0.88841278 0.05998381 0.44151010 0.13344499 0.13344499
## C-3PO CAPTAIN PANAKA DARTH MAUL DOFINE EMPEROR
## 0.32842084 0.54840868 0.09070399 0.05099477 0.42118426
## FODE/BEED GENERAL CEEL GREEDO JABBA JAR JAR
## 0.19029645 0.15168994 0.20814678 0.43352232 0.78821134
## JIRA KI-ADI-MUNDI KITSTER MACE WINDU NUTE GUNRAY
## 0.25323156 0.32786341 0.46806819 0.32786341 0.30677012
## OBI-WAN PADME PK-4 QUI-GON RABE
## 0.65783717 0.79409949 0.01812361 1.00000000 0.27111965
## RIC OLIE RUNE SEBULBA SHMI SIO BIBBLE
## 0.39174105 0.08119388 0.41710772 0.51422613 0.42636269
## TARPALS TC-14 TEY HOW VALORUM WALD
## 0.07440786 0.19198552 0.04143810 0.21423145 0.31651101
## WATTO YODA
## 0.32842084 0.32786341
graph density = no of edges/total no of possible edges
edge_density(g1)
## [1] 0.1936937
Mean distance - Mean path length is a concept in network topology that is defined as the average number of steps along the shortest paths for all possible pairs of network nodes. It is a measure of the efficiency of information or mass transport on a network.
Clique - A clique, C, in an undirected graph G = (V, E) is a subset of the vertices, C ??? V, such that every two distinct vertices are adjacent. This is equivalent to the condition that the induced subgraph of G induced by C is a complete graph.
#Mean path
mean_distance(g1,directed=FALSE)
## [1] 2.108108
#Clique
largest_cliques(g1)
## [[1]]
## + 7/37 vertices, named, from 56c20f4:
## [1] QUI-GON ANAKIN PADME JAR JAR SHMI JABBA SEBULBA
##
## [[2]]
## + 7/37 vertices, named, from 56c20f4:
## [1] QUI-GON ANAKIN PADME JAR JAR
## [5] CAPTAIN PANAKA BOSS NASS OBI-WAN
color_gAll <-ifelse(V(gAll)$appearance > 100, 'red', 'green')
graphjs(gAll, vertex.label = dataVerticesAll$all_info, vertex.size = dataVerticesAll$appearance/20, brush = TRUE, vertex.color = color_gAll, rescale = TRUE, edge.color = 'gold', main = 'Star Wars Episode 1 to 7', vertex.shape= "sphere", layout = layout_on_sphere)
#creating new dataframe for Actors who conversed more than 30 times in episodes 1 to 7 combined
df <- dataVerticesAll[order(-dataVerticesAll$total_edges),]
df <- df[df$total_edges>30,]
#using variable to extract the conversation from the edge dataframe
nam <- df$source
samp1 <- subset(dataEdgeAll,source %in% nam)
samp2 <- subset(dataEdgeAll,target %in% nam)
total_rec <- rbind(samp1,samp2)
total_rec <- unique(total_rec)
total_rec <- subset(total_rec,source %in% nam)
total_rec <- subset(total_rec,target %in% nam)
#creating graph object
g30 <- graph_from_data_frame(d = total_rec , vertices = df, directed = FALSE)
#creating network graph.
graphjs(g30, vertex.size = 0.1*V(g30)$appearance/2 , vertex.color = V(g30)$color,edge.color = "gold", main = 'StarWars Episodes 1 to 7', layout = layout_on_sphere, brush = TRUE, vertex.label = V(g30)$all_info, vertex.shape= "sphere")
NOOOOOOOOOOOOOOOOOOO