The following code and visual are a representation of the count of the occurances of the names Ron or Hermione in the Harry Potter book series. The chapters are grouped by the book, the x axis is the approximate chapter (approximate because the scale of the x axis is based on the book and the names Ron and Hermione are not in the first book until the 6th chapter), and the y axis is the count of each name. The legend shows the color and shape of each name. I chose to use the hollow circles to better show the differences in the word count.
This plot is showing only the word counts, further inference should be made as to see the interaction between Ron and Harry vs. Hermione and Harry, or maybe to see what each character is doing, saying or if they are just being mentioned or present in the scene of the book.
library(tidyverse)
## -- Attaching packages ----------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.3
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 1.0.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## Warning: package 'ggplot2' was built under R version 3.6.3
## -- Conflicts -------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(tidytext)
## Warning: package 'tidytext' was built under R version 3.6.3
# devtools::install_github("bradleyboehmke/harrypotter")
library(harrypotter)
sorc_stone<-tibble(b_o = 1,book = 'Sorcerer\'s Stone', chapter = 1:length(philosophers_stone), text = philosophers_stone)
chmbr_sec<-tibble(b_o = 2,book = 'Chamber of Secrets',chapter = 1:length(chamber_of_secrets), text = chamber_of_secrets)
pris_azk<-tibble(b_o = 3,book = 'Prisoner of Azkhban',chapter = 1:length(prisoner_of_azkaban), text = prisoner_of_azkaban)
gob_fire<-tibble(b_o = 4,book = 'Goblet of Fire',chapter = 1:length(goblet_of_fire), text = goblet_of_fire)
ord_phnx<-tibble(b_o = 5,book = 'Order of the Phoenix',chapter = 1:length(order_of_the_phoenix), text = order_of_the_phoenix)
hlf_bld_prnc<-tibble(b_o = 6,book = 'Half Blood Prince',chapter = 1:length(half_blood_prince), text = half_blood_prince)
dtly_hlws<-tibble(b_o = 7,book = 'Deathly Hallows',chapter = 1:length(deathly_hallows), text = deathly_hallows)
hp_df<-sorc_stone %>%
bind_rows(chmbr_sec) %>%
bind_rows(pris_azk) %>%
bind_rows(gob_fire) %>%
bind_rows(ord_phnx) %>%
bind_rows(hlf_bld_prnc) %>%
bind_rows(dtly_hlws)
hp_df$series_chapter<-1:nrow(hp_df)
hp_words_df<-hp_df %>%
group_by(series_chapter) %>%
unnest_tokens(word, text)
count_name_of_character_in_chapter<-function(name, words_df = hp_words_df) {
return(hp_words_df %>%
group_by(b_o,book, chapter) %>%
filter(word == name) %>%
count(word))
}
ron_by_chapter<-count_name_of_character_in_chapter('ron')
herminone_by_chapter<-count_name_of_character_in_chapter('hermione')
char_df<-ron_by_chapter %>%
right_join(herminone_by_chapter, by=c('b_o','book', 'chapter')) %>%
drop_na() %>%
mutate(word.x = 'Ron', word.y = 'Hermione')
char_df %>%
ggplot(aes(x = chapter, y = n.x, color = word.x)) +
geom_point(shape = 21) +
geom_point(aes(x = chapter, y = n.y, color = word.y), shape = 21) +
xlab('Chapter') +
ylab('Word Count') +
labs(title = 'Ron vs Hermione Word Count\nby Book, by Chapter', color = 'Name') +
facet_wrap(~ reorder(book,b_o), scales = 'free_x') +
theme(plot.title = element_text(hjust = .5))