# Load necessary libraries
pacman::p_load(pacman, tidytext, dplyr, tidyr, ggplot2, readr, topicmodels, gridExtra,
wordcloud, RColorBrewer, quanteda, quanteda.textstats, grid)
# Define the directory path and list of files
file_path <- "/cloud/project"
files <- c("Children_of_the_Stones_Full_Circle(7).csv",
"Children_of_the_Stones_Charmed_Circle(5).csv",
"Children_of_the_Stones_Narrowing_Circle(4).csv",
"Children_of_the_Stones_Serpent_in_the_Circle(3).csv",
"Children_of_the_Stones_Circle_of_Fear(2).csv",
"Children_of_the_Stones_Into_the_Circle(1).csv")
# Initialise data structures for combined processing
full_text <- data.frame()
nrc_plots <- list()
# Loop through files to process data and generate sentiment plots
for (i in seq_along(files)) {
# Extract episode info
episode_info <- gsub("Children_of_the_Stones_|\\.csv", "", files[i])
episode_name <- gsub("\\(\\d+\\)", "", episode_info) %>% trimws()
episode_number <- gsub(".*\\((\\d+)\\).*", "\\1", episode_info)
# Read the file
file_name <- file.path(file_path, files[i])
data <- read_csv(file_name, show_col_types = FALSE)
colnames(data) <- "text" # Ensure column consistency
# Combine data for overall analysis
full_text <- rbind(full_text, data)
# Tokenize text into words
tokens <- data %>%
unnest_tokens(word, text) %>%
anti_join(stop_words, by = "word")
# Perform NRC sentiment analysis for each episode
nrc_sentiments <- tokens %>%
inner_join(get_sentiments("nrc"), by = "word", relationship = "many-to-many") %>%
count(sentiment, sort = TRUE) %>%
mutate(percentage = n / sum(n) * 100)
# Create a plot for each episode
plot <- ggplot(nrc_sentiments, aes(x = reorder(sentiment, n), y = n, fill = sentiment)) +
geom_bar(stat = "identity") +
geom_text(aes(label = paste0(round(percentage, 1), "%")), hjust = 1) +
coord_flip() +
labs(title = paste("NRC Sentiments for", episode_name, episode_number),
x = "Sentiment", y = "Count") +
theme_minimal()
# Store the plot in the list
nrc_plots[[i]] <- plot
}
# Reverse the order of the episode plots
nrc_plots <- rev(nrc_plots)
# Add an overall title to the grid of plots
# Create a title grob
title_grob <- textGrob(
"NRC Sentiment Analysis by Episode: Children of the Stones; Analysis by Patrick Ford.",
gp = gpar(fontsize = 16, fontface = "bold")
)
# Create a caption grob
caption_grob <- textGrob(
"Written by: Jeremy Burnham and Trevor Ray; Produced and Directed by: Peter Graham Scott (note - episode 6 is missing (Squaring the Circle))",
gp = gpar(fontsize = 10, fontface = "bold")
)
# Arrange the plots with the title
grid_with_title <- arrangeGrob(
grobs = nrc_plots,
nrow = 2, ncol = 3, # Arrange the plots in a grid
top = title_grob, # Add the title at the top
bottom = caption_grob # Add the caption at the bottom
)
# Draw the grid
grid.draw(grid_with_title)

# Tokenize full text for combined analysis
tokens <- full_text %>%
unnest_tokens(word, text) %>%
anti_join(stop_words, by = "word")
# Word Frequency Analysis
word_counts <- tokens %>%
count(word, sort = TRUE)
# Plot the most common words
word_counts %>%
top_n(30) %>%
ggplot(aes(x = reorder(word, n), y = n)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(title = "Most Common Words in Children of the Stones; Series (note - episode 6 not included)",
x = "Words", y = "Frequency") +
theme_minimal()
## Selecting by n

# Bing Sentiment Analysis
bing_sentiments <- tokens %>%
inner_join(get_sentiments("bing"), by = "word")
bing_sentiments_count <- bing_sentiments %>%
count(sentiment, sort = TRUE) %>%
mutate(percentage = n / sum(n) * 100)
bing_plot <- ggplot(bing_sentiments_count, aes(x = sentiment, y = n, fill = sentiment)) +
geom_bar(stat = "identity") +
geom_text(aes(label = paste0(round(percentage, 1), "%")), vjust = 1) +
labs(title = "Bing Sentiment Analysis: Children of the Stones; Series (note - episode 6 not included)",
x = "Sentiment", y = "Count") +
theme_minimal()
# Create NRC sentiment analysis for the full series
nrc_sentiments_count <- tokens %>%
inner_join(get_sentiments("nrc"), by = "word", relationship = "many-to-many") %>%
count(sentiment, sort = TRUE) %>%
mutate(percentage = n / sum(n) * 100)
nrc_plot <- ggplot(nrc_sentiments_count, aes(x = reorder(sentiment, n), y = n, fill = sentiment)) +
geom_bar(stat = "identity") +
geom_text(aes(label = paste0(round(percentage, 1), "%")), hjust = 1) +
coord_flip() +
labs(title = "NRC Sentiment Analysis: Children of the Stones; Series (note - episode 6 not included)",
x = "Sentiment", y = "Count") +
theme_minimal()
# Generate Word Cloud
set.seed(1234)
wordcloud(words = word_counts$word, freq = word_counts$n, min.freq = 5,
max.words = 150, random.order = FALSE, rot.per = 0.1,
scale = c(2.5, 0.5), colors = brewer.pal(8, "Dark2"))
mtext("Word Cloud: Children of the Stones; Series (note - episode 6 not included)", side = 3, adj = 0, line = 1, cex = 1, font = 2)

# Textual Complexity: Flesch-Kincaid Readability
full_text_string <- paste(full_text$text, collapse = " ")
readability <- textstat_readability(full_text_string, measure = "Flesch.Kincaid")
print(paste("Flesch-Kincaid Readability Score; Series (note - episode 6 not included):", readability))
## [1] "Flesch-Kincaid Readability Score; Series (note - episode 6 not included): text1"
## [2] "Flesch-Kincaid Readability Score; Series (note - episode 6 not included): 3.26994233566308"
# Combine Bing and NRC sentiment plots side by side
grid.arrange(bing_plot, nrc_plot, nrow = 1)
