library(knitr)
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
library(openxlsx)
library(tidytext)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(scales)
library(ggplot2)
library(rlang)
library(wordcloud)
## Loading required package: RColorBrewer
library(forcats)
library(igraph)
##
## Attaching package: 'igraph'
## The following object is masked from 'package:rlang':
##
## is_named
## The following object is masked from 'package:tidyr':
##
## crossing
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(ggraph)
library(widyr)
library(ggrepel)
# Load the dataset
reviews <- read.xlsx("C:/Users/viole/Downloads/book_comparison_reviews.xlsx")
# Dataset Structure:
str(reviews)
## 'data.frame': 33 obs. of 12 variables:
## $ Author : chr "David McCloskey" "David McCloskey" "David McCloskey" "David McCloskey" ...
## $ Title : chr "Moscow X" "Moscow X" "Moscow X" "Moscow X" ...
## $ Series : chr "Standalone" "Standalone" "Standalone" "Standalone" ...
## $ Link : chr "https://www.goodreads.com/book/show/77265038-moscow-x?" "https://www.goodreads.com/book/show/77265038-moscow-x?" "https://www.goodreads.com/book/show/77265038-moscow-x?" "https://www.goodreads.com/book/show/77265038-moscow-x?" ...
## $ Description : chr "A fast-paced Cold War–style spy thriller set in modern Moscow, following CIA operatives navigating betrayal and deception." "A fast-paced Cold War–style spy thriller set in modern Moscow, following CIA operatives navigating betrayal and deception." "A fast-paced Cold War–style spy thriller set in modern Moscow, following CIA operatives navigating betrayal and deception." "A fast-paced Cold War–style spy thriller set in modern Moscow, following CIA operatives navigating betrayal and deception." ...
## $ Avg.Stars : chr "4.4" "4.4" "4.4" "4.4" ...
## $ #.Reviews : num 699 699 699 699 699 699 699 699 699 699 ...
## $ Price : num 15 15 15 15 15 ...
## $ Genre : chr "Spy Thriller" "Spy Thriller" "Spy Thriller" "Spy Thriller" ...
## $ Review.Stars: num NA 5 5 5 5 5 3 3 2 2 ...
## $ Headline : chr NA "Authentic spy craft" "Gripping from start" "Great modern espionage" ...
## $ Review.Text : chr NA "This novel pulled me in quickly with its tense atmosphere and well-developed spy narrative. The author did a gr"| __truncated__ "From the opening chapters, this book created a strong sense of suspense that carried through to the end. The st"| __truncated__ "I found this book to be a compelling take on contemporary espionage with plenty of twists and tension. The paci"| __truncated__ ...
# Bigrams Analysis
bigrams <- reviews %>%
unnest_tokens(bigram, Review.Text, token = "ngrams", n = 2)
bigrams %>%
count(bigram, sort = TRUE) %>%
filter(n>10)
## bigram n
## 1 did not 16
## 2 this book 15
## 3 the story 14
## 4 which made 12
## 5 a few 11
## 6 the characters 11
## 7 the plot 11
# Custom Stop Words
custom <- data.frame(
word = c(
# Generic review noise
"book", "books", "story", "stories", "read", "feel", "written",
"characters", "character", "plot", "elements",
# Book-specific words
"moscow", "x", "peacock", "sparrow", "spy", "alone",
# Author names
"mccloskey", "david", "berry", "is", "beaumont", "charles",
# Optional genre words (remove if you want a cleaner cloud)
"suspense", "espionage"
)
)
#First, we separate the bigram words into separate columns
bigrams_separated <- bigrams %>%
separate(bigram, c("word1", "word2"), sep = " ")
bigrams_filtered <- bigrams_separated %>%
filter(!word1 %in% stop_words$word) %>%
filter(!word2 %in% stop_words$word) %>%
filter(!word1 %in% custom$word) %>%
filter(!word2 %in% custom$word)
# new bigram counts:
bigram_counts <- bigrams_filtered %>%
count(word1, word2, sort = TRUE)
bigram_counts %>%
unite(bigram, word1, word2, sep = " ")%>%
filter(n>2)
## bigram n
## 1 added realism 3
## 2 author explored 3
## 3 balancing action 3
## 4 challenges faced 3
## 5 completely lost 3
## 6 detailed approach 3
## 7 emotional challenges 3
## 8 emotional stakes 3
## 9 enjoy modern 3
## 10 enjoyable thriller 3
## 11 establish mood 3
## 12 helped establish 3
## 13 hidden agendas 3
## 14 intelligence officers 3
## 15 job balancing 3
## 16 main storyline 3
## 17 modern political 3
## 18 narrative slowed 3
## 19 nonstop action 3
## 20 personal conflict 3
## 21 political intrigue 3
## 22 political thrillers 3
## 23 setting influenced 3
## 24 stakes involved 3
## 25 strong sense 3
## 26 successfully combined 3
## 27 tense atmosphere 3
## 28 thoughtful mix 3
## 29 thriller experience 3
## 30 unfolded gradually 3
## 31 writing style 3
## 32 NA NA 3
# Trigrams Analysis
reviews %>%
unnest_tokens(trigram, Review.Text, token = "ngrams", n = 3) %>%
separate(trigram, c("word1", "word2", "word3"), sep = " ") %>%
filter(!word1 %in% stop_words$word,
!word2 %in% stop_words$word,
!word3 %in% stop_words$word) %>%
filter(!word1 %in% custom$word,
!word2 %in% custom$word,
!word3 %in% custom$word) %>%
count(word1, word2, word3, sort = TRUE)%>%
unite(trigram, word1, word2, word3, sep = " ")%>%
filter(n>1)
## trigram n
## 1 emotional challenges faced 3
## 2 emotional stakes involved 3
## 3 enjoy modern political 3
## 4 enjoyable thriller experience 3
## 5 helped establish mood 3
## 6 job balancing action 3
## 7 modern political thrillers 3
## 8 NA NA NA 3
## 9 create meaningful tension 2
## 10 disappointing reading experience 2
## 11 premise sounded promising 2
## 12 structure lacked focus 2
## 13 writing style flat 2
# Bigram Networks
# Visualize relationships using igraph
bigrams <- reviews %>%
unnest_tokens(bigram, Review.Text, token = "ngrams", n = 2) %>%
separate(bigram, into = c("word1", "word2"), sep = " ") %>%
filter(!is.na(word1), !is.na(word2))
bigram_counts <- bigrams %>%
count(word1, word2, sort = TRUE)
bigram_graph <- bigram_counts %>%
filter(n > 5) %>%
graph_from_data_frame()
bigrams <- bigrams %>%
filter(!word1 %in% stop_words$word,
!word2 %in% stop_words$word)
set.seed(2017)
ggraph(bigram_graph, layout = "kk") +
geom_edge_link(alpha = 0.2, color = "gray60") +
geom_node_point(size = 3, color = "steelblue") +
geom_node_text(aes(label = name), repel = TRUE, size = 3) +
theme_void()

set.seed(2020)
a <- grid::arrow(type = "closed", length = unit(.15, "inches"))
ggraph(bigram_graph, layout = "fr") +
geom_edge_link(aes(edge_alpha = n), show.legend = FALSE,
arrow = a, end_cap = circle(.07, 'inches')) +
geom_node_point(color = "lightblue", size = 5) +
geom_node_text(aes(label = name), vjust = 1, hjust = 1) +
theme_void()

# Correlations
reviews$id <- 1:nrow(reviews)
str(reviews)
## 'data.frame': 33 obs. of 13 variables:
## $ Author : chr "David McCloskey" "David McCloskey" "David McCloskey" "David McCloskey" ...
## $ Title : chr "Moscow X" "Moscow X" "Moscow X" "Moscow X" ...
## $ Series : chr "Standalone" "Standalone" "Standalone" "Standalone" ...
## $ Link : chr "https://www.goodreads.com/book/show/77265038-moscow-x?" "https://www.goodreads.com/book/show/77265038-moscow-x?" "https://www.goodreads.com/book/show/77265038-moscow-x?" "https://www.goodreads.com/book/show/77265038-moscow-x?" ...
## $ Description : chr "A fast-paced Cold War–style spy thriller set in modern Moscow, following CIA operatives navigating betrayal and deception." "A fast-paced Cold War–style spy thriller set in modern Moscow, following CIA operatives navigating betrayal and deception." "A fast-paced Cold War–style spy thriller set in modern Moscow, following CIA operatives navigating betrayal and deception." "A fast-paced Cold War–style spy thriller set in modern Moscow, following CIA operatives navigating betrayal and deception." ...
## $ Avg.Stars : chr "4.4" "4.4" "4.4" "4.4" ...
## $ #.Reviews : num 699 699 699 699 699 699 699 699 699 699 ...
## $ Price : num 15 15 15 15 15 ...
## $ Genre : chr "Spy Thriller" "Spy Thriller" "Spy Thriller" "Spy Thriller" ...
## $ Review.Stars: num NA 5 5 5 5 5 3 3 2 2 ...
## $ Headline : chr NA "Authentic spy craft" "Gripping from start" "Great modern espionage" ...
## $ Review.Text : chr NA "This novel pulled me in quickly with its tense atmosphere and well-developed spy narrative. The author did a gr"| __truncated__ "From the opening chapters, this book created a strong sense of suspense that carried through to the end. The st"| __truncated__ "I found this book to be a compelling take on contemporary espionage with plenty of twists and tension. The paci"| __truncated__ ...
## $ id : int 1 2 3 4 5 6 7 8 9 10 ...
words <- reviews %>%
unnest_tokens(word, Review.Text) %>%
anti_join(stop_words)%>%
anti_join(custom)
reviews[1:10,]
## Author Title Series
## 1 David McCloskey Moscow X Standalone
## 2 David McCloskey Moscow X Standalone
## 3 David McCloskey Moscow X Standalone
## 4 David McCloskey Moscow X Standalone
## 5 David McCloskey Moscow X Standalone
## 6 David McCloskey Moscow X Standalone
## 7 David McCloskey Moscow X Standalone
## 8 David McCloskey Moscow X Standalone
## 9 David McCloskey Moscow X Standalone
## 10 David McCloskey Moscow X Standalone
## Link
## 1 https://www.goodreads.com/book/show/77265038-moscow-x?
## 2 https://www.goodreads.com/book/show/77265038-moscow-x?
## 3 https://www.goodreads.com/book/show/77265038-moscow-x?
## 4 https://www.goodreads.com/book/show/77265038-moscow-x?
## 5 https://www.goodreads.com/book/show/77265038-moscow-x?
## 6 https://www.goodreads.com/book/show/77265038-moscow-x?
## 7 https://www.goodreads.com/book/show/77265038-moscow-x?
## 8 https://www.goodreads.com/book/show/77265038-moscow-x?
## 9 https://www.goodreads.com/book/show/77265038-moscow-x?
## 10 https://www.goodreads.com/book/show/77265038-moscow-x?
## Description
## 1 A fast-paced Cold War–style spy thriller set in modern Moscow, following CIA operatives navigating betrayal and deception.
## 2 A fast-paced Cold War–style spy thriller set in modern Moscow, following CIA operatives navigating betrayal and deception.
## 3 A fast-paced Cold War–style spy thriller set in modern Moscow, following CIA operatives navigating betrayal and deception.
## 4 A fast-paced Cold War–style spy thriller set in modern Moscow, following CIA operatives navigating betrayal and deception.
## 5 A fast-paced Cold War–style spy thriller set in modern Moscow, following CIA operatives navigating betrayal and deception.
## 6 A fast-paced Cold War–style spy thriller set in modern Moscow, following CIA operatives navigating betrayal and deception.
## 7 A fast-paced Cold War–style spy thriller set in modern Moscow, following CIA operatives navigating betrayal and deception.
## 8 A fast-paced Cold War–style spy thriller set in modern Moscow, following CIA operatives navigating betrayal and deception.
## 9 A fast-paced Cold War–style spy thriller set in modern Moscow, following CIA operatives navigating betrayal and deception.
## 10 A fast-paced Cold War–style spy thriller set in modern Moscow, following CIA operatives navigating betrayal and deception.
## Avg.Stars #.Reviews Price Genre Review.Stars Headline
## 1 4.4 699 14.99 Spy Thriller NA <NA>
## 2 4.4 699 14.99 Spy Thriller 5 Authentic spy craft
## 3 4.4 699 14.99 Spy Thriller 5 Gripping from start
## 4 4.4 699 14.99 Spy Thriller 5 Great modern espionage
## 5 4.4 699 14.99 Spy Thriller 5 Complex and thrilling
## 6 4.4 699 14.99 Spy Thriller 5 Top-tier thriller
## 7 4.4 699 14.99 Spy Thriller 3 Slow middle
## 8 4.4 699 14.99 Spy Thriller 3 Too many characters
## 9 4.4 699 14.99 Spy Thriller 2 Overly technical
## 10 4.4 699 14.99 Spy Thriller 2 Predictable twists
## Review.Text
## 1 <NA>
## 2 This novel pulled me in quickly with its tense atmosphere and well-developed spy narrative. The author did a great job balancing action with character development, which made the story feel both exciting and grounded. I especially enjoyed how the setting influenced the plot and added realism to the espionage elements. While a few sections were slower, the overall experience was engaging and rewarding. I would definitely recommend this to readers who enjoy modern political thrillers.
## 3 From the opening chapters, this book created a strong sense of suspense that carried through to the end. The storyline was layered and kept me guessing about character motives and hidden agendas. I appreciated the attention to detail in describing the intelligence work and the emotional stakes involved. Some scenes were more descriptive than necessary, but they helped establish mood. Overall, it was a satisfying and entertaining read.
## 4 I found this book to be a compelling take on contemporary espionage with plenty of twists and tension. The pacing worked well for most of the story, and the major plot turns felt earned rather than forced. The main characters were interesting enough to make me care about their decisions and consequences. There were moments where the narrative slowed down, but it never completely lost my interest. It stands out as a strong example of the genre.
## 5 This story offered a thoughtful mix of political intrigue and personal conflict. I liked how the author explored both the professional and emotional challenges faced by intelligence officers. The plot unfolded gradually, which allowed the suspense to build in a believable way. Some readers may prefer more nonstop action, but I appreciated the slower, more detailed approach. In the end, it delivered a solid and enjoyable thriller experience.
## 6 The novel successfully combined suspense, action, and character development into a cohesive story. The espionage elements felt researched and realistic, which made the stakes feel higher. I also enjoyed the way relationships between characters evolved over time instead of being rushed. A few subplots were less interesting than the main storyline, but they did not ruin the overall impact. This book met my expectations for a modern spy thriller.
## 7 This book had an interesting premise and a few moments that were genuinely engaging. However, the pacing felt uneven, with some sections dragging on longer than necessary. The characters were serviceable but not especially memorable, which made it harder to feel invested in their outcomes. While the espionage elements were intriguing, the story did not always hold my attention. Overall, it was an average read with both strengths and weaknesses.
## 8 There were parts of this novel that I enjoyed, particularly the setting and basic plot idea. At the same time, I found the execution inconsistent, with stretches of slow development followed by rushed scenes. The characters were not as fully developed as I hoped they would be. Some of the political or spy-related details were interesting, but others felt confusing. It was a decent read, but not one I would strongly recommend.
## 9 I wanted to enjoy this book because the premise sounded promising, but I struggled to stay interested. The pacing felt slow and uneven, which made it difficult to build suspense. The characters were thinly developed, and I did not feel connected to their motivations. Several plot points seemed predictable or underexplained. Compared to other books in this genre, this one fell short of my expectations.
## 10 This novel did not work for me despite its interesting concept. The story lacked momentum, and long sections felt unnecessary or repetitive. I found it hard to care about what happened to the characters because they were not well developed. The espionage elements were not as exciting as I hoped they would be. Overall, it was a disappointing reading experience.
## id
## 1 1
## 2 2
## 3 3
## 4 4
## 5 5
## 6 6
## 7 7
## 8 8
## 9 9
## 10 10
# count words co-occuring within sections
word_pairs <- words %>%
pairwise_count(word, id, sort = TRUE)
word_pairs[1:10,]
## # A tibble: 10 × 3
## item1 item2 n
## <chr> <chr> <dbl>
## 1 enjoyed development 7
## 2 development enjoyed 7
## 3 action author 6
## 4 slower author 6
## 5 experience author 6
## 6 readers author 6
## 7 political author 6
## 8 author action 6
## 9 development action 6
## 10 enjoyed action 6
word_cors <- words %>%
group_by(word) %>%
filter(n() >= 3) %>%
pairwise_cor(word, id, sort = TRUE)
word_cors[1:15,]
## # A tibble: 15 × 3
## item1 item2 correlation
## <chr> <chr> <dbl>
## 1 atmosphere pulled 1
## 2 job pulled 1
## 3 balancing pulled 1
## 4 grounded pulled 1
## 5 influenced pulled 1
## 6 added pulled 1
## 7 rewarding pulled 1
## 8 thrillers pulled 1
## 9 pulled atmosphere 1
## 10 job atmosphere 1
## 11 balancing atmosphere 1
## 12 grounded atmosphere 1
## 13 influenced atmosphere 1
## 14 added atmosphere 1
## 15 rewarding atmosphere 1
word_cors %>%
filter(item1 %in% c("suspense", "engaging", "characters")) %>%
group_by(item1) %>%
slice_max(correlation, n = 6) %>%
ungroup() %>%
mutate(item2 = reorder(item2, correlation)) %>%
ggplot(aes(item2, correlation)) +
geom_bar(stat = "identity") +
facet_wrap(~ item1, scales = "free") +
coord_flip()

set.seed(2016)
word_cors %>%
filter(correlation<0.85) %>%
slice_max(correlation, n=40) %>% # Adjust this number to control density
graph_from_data_frame() %>%
ggraph(layout = "fr") +
geom_edge_link(aes(edge_alpha = correlation), show.legend = FALSE) +
geom_node_point(color = "lightblue", size = 1) +
geom_node_text(aes(label = name), repel = TRUE, size = 2) +
theme_void()
