Sigur Ros- The Reynes of Castamere
Following on from yesterday’s post, I wanted to create some static graphs that were easier on my PC of the A Song of Ice and Fire books, using the network and ggnet packages
library(network)
library(ggnet)
library(ggplot2)
I’m using the same data I scraped in the previous post but instead of going through all that again, I’ll load it from my GitHub. The only change I think I’ve made is to add the links from the Characters df sheet to the Nodes df to tidy the environment a bit
Github <- "https://raw.githubusercontent.com/RobWHickman/"
Project <- "Databases-and-Files/master/GoT-Network/"
Matrix_file <- paste0(Github, Project, "GoT-Matrix.csv")
Network_file <- paste0(Github, Project, "GoT-Network.csv")
Nodes_file <- paste0(Github, Project, "GoT-Nodes.csv")
Books_file <- paste0(Github, Project, "GoT-Books.csv")
chapter_df <- read.csv(Matrix_file, stringsAsFactors = FALSE)
names(chapter_df)[1] <- "name"
Links <- read.csv(Network_file, stringsAsFactors = FALSE)
Nodes <- read.csv(Nodes_file, stringsAsFactors = FALSE)
books <- read.csv(Books_file, stringsAsFactors = FALSE)
I also wanted to colour each network by the book cover. This basically copies the Sigil colouring function from the previous post. I also lightened the colours for the area in the main plot so the nodes/vertexes could still be seen
library(jpeg)
library(RImagePalette)
#function to get the colour of each book cover
cover_func <- function(book_cover){
download.file(book_cover, "cover.jpg", mode = "wb", quiet = TRUE)
img <- readJPEG("cover.jpg")
cover_colour <- image_palette(img, n=1)
return(cover_colour)
}
books$cover_colours <- unlist(lapply(books$cover, cover_func))
#make the colours a bit lighter so they don't interfere with the plot
for (cover in 1:nrow(books)){
ifelse(cover < 2, light_covers <- colorRampPalette(c(books$cover_colours[cover], "white"))(100)[75], light_covers <- append(light_covers, colorRampPalette(c(books$cover_colours[cover], "white"))(100)[75]))}
books$light_covers <- light_covers
I also scraped the point-of-view of the chapter (i.e. the character who is the main focus of each chapter) using the chapter_urls from the previous post and running them in this function. I’ve commented it out and added the download fo the Git file below to save scraping on re-running my own script
#pov_func <- function(chapter_url){
# read <- read_html(chapter_url)
# nodes <- read %>% html_nodes("#content > div > div > div > div > ul > li:nth-child(1) > a")
# pov <- nodes %>% html_text()
# return(pov)
#}
#povs <- lapply(chapter_urls, pov_func)
#chapters <- data.frame(pov = unlist(povs), book = rep(books$book, books$chapters))
Chapters_file <- paste0(Github, Project, "GoT-Chapters.csv")
chapters <- read.csv(Chapters_file, stringsAsFactors = FALSE)
To plot each book separately (my computer can’t handle running network() on the 22k edges between all the characters in every book) I’ll use this massive loop which will spit out a lattice of all 5 network plots at the end. Hopefully it should be commented enough that it won’t get too confusing
#get the chapter numbers cumulatively for each book
books$cumsum <- cumsum(books$chapters)
#make the chapter_df for just chapters in 1 book + the name column
for (book in 1:5){
lower <- sum(books$cumsum[book-1]) + 2
upper <- sum(books$cumsum[book])
book_df <- chapter_df[c(1, lower:upper)]
Nodes <- Nodes[order(Nodes$order),]
book_df$name <- Nodes$name[which(Nodes$links %in% book_df$name)]
pov_chapters <- chapters[c((lower-1):upper),]
#remove characters that don't appear in that book
rm(to_delete)
for (character in 1:nrow(book_df)){
if(sum(book_df[character, c(2:ncol(book_df))]) < 1){
ifelse(!exists("to_delete"),
to_delete <- c(character),
to_delete <- append(to_delete, character)
)
}}
book_df <- book_df[-c(to_delete),]
#prepare a matrix of interactions between each character
#a df of n(characters) x n(characters) where the value is the connections
#between character[row] and character[column]
rm(Book_Matrix)
for (protag in 1:nrow(book_df)){
for (other in 1:nrow(book_df)){
if(protag == other){
ifelse(other == 1,
newrow <- 0,
newrow <- append(newrow, 0))
} else{
connection <- length(which(
book_df[protag,] == 1 &
book_df[other,] == 1))
ifelse(other == 1,
newrow <- connection,
newrow <- append(newrow, connection))
}
}
newrow <- data.frame(newrow)
names(newrow) <- book_df$name[protag]
ifelse(!exists("Book_Matrix"),
Book_Matrix <- newrow,
Book_Matrix <- cbind(Book_Matrix, newrow))
}
#make network object of the matrix using network()
rownames(Book_Matrix) <- colnames(Book_Matrix)
net <- network(Book_Matrix)
#prepare the nodes df by deleting unused nodes
#i.e. the characters that don't appear in this book
NodesBook <- Nodes[which(Nodes$name %in% book_df$name),]
#add rownames so we can find each node from the network object
rownames(NodesBook) <- NodesBook$order
#Also change the size variable to be the number of chapters each
#character appears in in just this one book
NodesBook$size <- rowSums(book_df[2:ncol(book_df)])
#bind aethetics to the network object
net %v% "region" = as.character(NodesBook$region)
net %v% "colour" = as.character(NodesBook$colour)
net %v% "size" = (as.numeric(NodesBook$size) ^ 0.25) * 2.5
net %v% "nodesize" = log(as.numeric(NodesBook$size)) * 3
#set up which labels to show
#(i.e. only "recurring" characters who are in more than 3 chapters)
povs <- unique(pov_chapters$pov)
extralabels <- book_df$name[which(rowSums(book_df[2:ncol(book_df)]) > 10)]
labels <- append(unique(povs), extralabels)
#set up colours for each region
y <- c(Dorne. = "Orange", Vale. = "Skyblue", Westerlands. = "Red",
Stormlands. = "Yellow", Riverlands. = "Darkblue",
Crownlands. = "Goldenrod", North. = "White", Islands. = "Grey30",
Reach. = "Darkgreen", none = "Grey70", Westeros. = "Purple")
#plot the ggnet
plot <- ggnet2(net, label = labels, node.color = "region", palette = y,
label.size = "size", node.size = "nodesize",
alpha = 0.75, edge.color = "grey75") +
ggtitle(paste0("Book ", book, ": ", books$title[book])) +
theme(legend.position = "none",
title = element_text(size = 20),
plot.background = element_rect(fill = books$cover_colours[book]),
panel.background = element_rect(fill = books$light_covers[book]))
#assign the plot to the book number
assign(paste0("book", book), plot)
#close up the loop per book
}
Then make the final lattice plot of all five books together using grid.arrange()
library(gridExtra)
#plot all five books
grid.arrange(book1, book2, book3, book4, book5)
---
title: "A Plot of Networks (part 2)"
output: html_notebook
---
[Sigur Ros- The Reynes of Castamere](https://www.youtube.com/watch?v=w3QW8PVyyNM)

Following on from yesterday's post, I wanted to create some static graphs that were easier on my PC of the A Song of Ice and Fire books, using the [network](https://cran.r-project.org/web/packages/network/index.html) and [ggnet](https://cran.r-project.org/web/packages/GGally/index.html) packages

```{r, message=FALSE, warning=FALSE}
library(network)
library(ggnet)
library(ggplot2)
```

I'm using the same data I scraped in the previous post but instead of going through all that again, I'll load it from my GitHub. The only change I think I've made is to add the links from the Characters df sheet to the Nodes df to tidy the environment a bit

```{r, message=FALSE, warning=FALSE}
Github <- "https://raw.githubusercontent.com/RobWHickman/"
Project <- "Databases-and-Files/master/GoT-Network/"

Matrix_file <- paste0(Github, Project, "GoT-Matrix.csv")
Network_file <- paste0(Github, Project, "GoT-Network.csv")
Nodes_file <- paste0(Github, Project, "GoT-Nodes.csv")
Books_file <- paste0(Github, Project, "GoT-Books.csv")

chapter_df <- read.csv(Matrix_file, stringsAsFactors = FALSE)
  names(chapter_df)[1] <- "name"
Links <- read.csv(Network_file, stringsAsFactors = FALSE)
Nodes <- read.csv(Nodes_file, stringsAsFactors = FALSE)
books <- read.csv(Books_file, stringsAsFactors = FALSE)
```

I also wanted to colour each network by the book cover. This basically copies the Sigil colouring function from the previous post. I also lightened the colours for the area in the main plot so the nodes/vertexes could still be seen

```{r, message=FALSE,warning=FALSE}
library(jpeg)
library(RImagePalette)

#function to get the colour of each book cover
cover_func <- function(book_cover){
  download.file(book_cover, "cover.jpg", mode = "wb", quiet = TRUE)
	img <- readJPEG("cover.jpg")
	cover_colour <- image_palette(img, n=1)
return(cover_colour)
}

books$cover_colours <- unlist(lapply(books$cover, cover_func))

#make the colours a bit lighter so they don't interfere with the plot
for (cover in 1:nrow(books)){
    ifelse(cover < 2, light_covers <- colorRampPalette(c(books$cover_colours[cover], "white"))(100)[75], light_covers <- append(light_covers, colorRampPalette(c(books$cover_colours[cover], "white"))(100)[75]))}
books$light_covers <- light_covers
```

I also scraped the point-of-view of the chapter (i.e. the character who is the main focus of each chapter) using the chapter_urls from the previous post and running them in this function. I've commented it out and added the download fo the Git file below to save scraping on re-running my own script

```{r, message=FALSE, warning=FALSE}
#pov_func <- function(chapter_url){
#	read <- read_html(chapter_url)
#	nodes <- read %>% html_nodes("#content > div > div > div > div > ul > li:nth-child(1) > a")
#	pov <- nodes %>% html_text()
#	return(pov)
#}

#povs <- lapply(chapter_urls, pov_func)

#chapters <- data.frame(pov = unlist(povs), book = rep(books$book, books$chapters))

Chapters_file <- paste0(Github, Project, "GoT-Chapters.csv")
chapters <- read.csv(Chapters_file, stringsAsFactors = FALSE)
```


To plot each book separately (my computer can't handle running network() on the 22k edges between all the characters in every book) I'll use this massive loop which will spit out a lattice of all 5 network plots at the end. Hopefully it should be commented enough that it won't get too confusing

```{r, message=FALSE, warning=FALSE}
#get the chapter numbers cumulatively for each book
books$cumsum <- cumsum(books$chapters)

#make the chapter_df for just chapters in 1 book + the name column
for (book in 1:5){
  lower <- sum(books$cumsum[book-1]) + 2
  upper <- sum(books$cumsum[book])
  book_df <- chapter_df[c(1, lower:upper)]
  Nodes <- Nodes[order(Nodes$order),]
  book_df$name <- Nodes$name[which(Nodes$links %in% book_df$name)]
  
pov_chapters <- chapters[c((lower-1):upper),]

#remove characters that don't appear in that book
rm(to_delete)
for (character in 1:nrow(book_df)){
  if(sum(book_df[character, c(2:ncol(book_df))]) < 1){
    ifelse(!exists("to_delete"), 
           to_delete <- c(character),
           to_delete <- append(to_delete, character)
    )
  }}
book_df <- book_df[-c(to_delete),]

#prepare a matrix of interactions between each character
#a df of n(characters) x n(characters) where the value is the connections
#between character[row] and character[column]
rm(Book_Matrix)
for (protag in 1:nrow(book_df)){
  for (other in 1:nrow(book_df)){
    if(protag == other){
      ifelse(other == 1, 
             newrow <- 0,
             newrow <- append(newrow, 0))
    } else{
    connection <- length(which(
 				book_df[protag,] == 1 &
 				book_df[other,] == 1))
      ifelse(other == 1, 
             newrow <- connection,
             newrow <- append(newrow, connection))
    }
  }
  newrow <- data.frame(newrow)
    names(newrow) <- book_df$name[protag]
    ifelse(!exists("Book_Matrix"), 
         Book_Matrix <- newrow,
         Book_Matrix <- cbind(Book_Matrix, newrow))
  }

#make network object of the matrix using network()
rownames(Book_Matrix) <- colnames(Book_Matrix)
net <- network(Book_Matrix)

#prepare the nodes df by deleting unused nodes
#i.e. the characters that don't appear in this book
NodesBook <- Nodes[which(Nodes$name %in% book_df$name),]

#add rownames so we can find each node from the network object
rownames(NodesBook) <- NodesBook$order
#Also change the size variable to be the number of chapters each
#character appears in in just this one book
NodesBook$size <- rowSums(book_df[2:ncol(book_df)])

#bind aethetics to the network object
net %v% "region" = as.character(NodesBook$region)
net %v% "colour" = as.character(NodesBook$colour)
net %v% "size" = (as.numeric(NodesBook$size) ^ 0.25) * 2.5
net %v% "nodesize" = log(as.numeric(NodesBook$size)) * 3

#set up which labels to show
#(i.e. only "recurring" characters who are in more than 3 chapters)
povs <- unique(pov_chapters$pov)
extralabels <- book_df$name[which(rowSums(book_df[2:ncol(book_df)]) > 10)]
labels <- append(unique(povs), extralabels)
#set up colours for each region
y <- c(Dorne. = "Orange", Vale. = "Skyblue", Westerlands. = "Red",
       Stormlands. = "Yellow", Riverlands. = "Darkblue", 
       Crownlands. = "Goldenrod", North. = "White", Islands. = "Grey30",
       Reach. = "Darkgreen", none = "Grey70", Westeros. = "Purple")

#plot the ggnet
plot <- ggnet2(net, label = labels, node.color = "region", palette = y,
               label.size = "size", node.size = "nodesize",
               alpha = 0.75, edge.color = "grey75") +
    ggtitle(paste0("Book ", book, ": ", books$title[book])) +
    theme(legend.position = "none",
          title = element_text(size = 20),
          plot.background = element_rect(fill = books$cover_colours[book]),
          panel.background = element_rect(fill = books$light_covers[book]))

#assign the plot to the book number
assign(paste0("book", book), plot)

#close up the loop per book
}
```

Then make the final lattice plot of all five books together using grid.arrange()

```{r}
library(gridExtra)

#plot all five books
grid.arrange(book1, book2, book3, book4, book5)
```
