Install Packages

install.packages(c("tidytext", "ggwordcloud"))

Load Packages

library(tidyverse)
library(tidytext)

Load data

f <- read_file("./data/01 - The Fellowship Of The Ring.txt") %>%
  tibble(book = "Fellowship",
         text = .)

Tokenize text

fotr <- f %>%
  unnest_tokens(output = word, input = text)

Get Word Counts

wc <- fotr %>%
  group_by(word) %>%
  tally()

top_15 <- wc %>%
  top_n(15, n)
ggplot(top_15, aes(x = word, y = n)) + 
  geom_col()

ggplot(top_15, aes(x = reorder(word, n), y = n)) +
  geom_col()

Remove stopwords

fotr_tidy <- fotr %>%
  anti_join(stop_words)

Activity 1


wc <- fotr_tidy %>%
  group_by(word) %>%
  tally()

top_15 <- wc %>%
  top_n(15, n)
ggplot(top_15, aes(x = word, y = n)) + 
  geom_col()

ggplot(top_15, aes(x = reorder(word, n), y = n)) +
  geom_col()

Activity 2


f <- read_file("./data/02 - The Two Towers.txt") %>%
  tibble(book = "Towers",
         text = .)

tt <- f %>%
  unnest_tokens(output = word, input = text)

f <- read_file("./data/03 - The Return Of The King.txt") %>%
  tibble(book = "King",
         text = .)

rotk <- f %>%
  unnest_tokens(output = word, input = text)

Combine all texts

df <- bind_rows(fotr, tt, rotk)

Get word counts per book

wc <- df %>%
  anti_join(stop_words) %>%
  group_by(book, word) %>%
  tally()

Activity 3


top_15_book <- wc %>%
  group_by(book) %>%
  top_n(15, n)

ggplot(top_15_book, aes(x = reorder(word, n), y = n)) +
  geom_col() +
  coord_flip() +
  facet_wrap(~book, scales = "free_y")

Fixing up the plot a bit

ggplot(top_15_book, aes(x = reorder_within(word, n, within = book), y = n)) +
  geom_col() +
  coord_flip() +
  facet_wrap(~book, scales = "free_y") +
  scale_x_reordered()

ggplot(top_15_book, aes(x = reorder_within(word, n, within = book), y = n)) +
  geom_col() +
  coord_flip() +
  facet_wrap(~factor(book, levels = c("Fellowship", "Towers", "King")), scales = "free_y") +
  scale_x_reordered()

Creating a Word Cloud

library(ggwordcloud)
ggplot(top_15_book, aes(label = word)) +
  geom_text_wordcloud()
ggplot(top_15_book, aes(label = word)) +
  geom_text_wordcloud(aes(color = book, size = n))
ggplot(top_15_book, aes(label = word)) +
  geom_text_wordcloud(aes(color = book, size = n)) +
  facet_wrap(~book)

Activity 4


wc_all <- df %>%
  group_by(word) %>%
  anti_join(stop_words) %>%
  tally() %>%
  top_n(100, n)
ggplot(wc_all, aes(label = word)) +
  geom_text_wordcloud()
ggplot(wc_all, aes(label = word, size = n, color = n)) +
  geom_text_wordcloud() +
  scale_size_area(max_size = 12)

Using different shapes

wc_all <- df %>%
  group_by(word) %>%
  anti_join(stop_words) %>%
  tally() %>%
  top_n(100, n)

ggplot(wc_all, aes(label = word, size = n, color = n)) +
  geom_text_wordcloud(shape = "star", grid_margin = 0.5) +
  scale_size_area(max_size = 7)

ggplot(wc_all, aes(label = word, size = n, color = n)) +
  geom_text_wordcloud(shape = "pentagon", grid_margin = 0.5) +
  scale_size_area(max_size = 7)

ggplot(wc_all, aes(label = word, size = n, color = n)) +
  geom_text_wordcloud(shape = "diamond", grid_margin = 0.5) +
  scale_size_area(max_size = 7)