pacman::p_load(pacman, tidytext, dplyr, tidyr, ggplot2, readr, wordcloud, RColorBrewer, udpipe)
bible_data <- read_csv(“Douay_Rheims_Bible2.csv”)
colnames(bible_data) <- c(“Book”, “Chapter”, “Verse”, “Verse_Text”)
bigrams <- bible_data %>% unnest_tokens(bigram, Verse_Text, token = “ngrams”, n = 2)
bigrams_separated <- bigrams %>% separate(bigram, into = c(“word1”, “word2”), sep = ” “) %>% filter(!word1 %in% stop_words\(word, !word2 %in% stop_words\)word)
bigram_counts <- bigrams_separated %>% unite(bigram, word1, word2, sep = ” “) %>% count(bigram, sort = TRUE)
bigram_counts %>% top_n(20, n) %>% ggplot(aes(x = reorder(bigram, n), y = n)) + geom_bar(stat = “identity”) + coord_flip() + labs(title = “Most Common Bigrams in the Douay-Rheims Bible”, x = “Bigram”, y = “Frequency”) + theme_minimal()
set.seed(1234) wordcloud(words = bigram_counts\(bigram, freq = bigram_counts\)n, min.freq = 2, max.words = 100, random.order = FALSE, rot.per = 0.1, scale = c(3.5, 0.75), colors = brewer.pal(8, “Dark2”)) mtext(“Word Cloud of Bigrams in the Douay-Rheims Bible”, side = 3, adj = 0, line = 1, cex = 1, font = 2)