# Load libraries
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
# Helpful function
n.words <- function(x) {
sapply(gregexpr("\\W+", x), length) + 1
}
# Load scriptures from SQLite database
# Download from http://scriptures.nephi.org
scriptures.db <- src_sqlite("~/Downloads/lds-scriptures-3.0/sqlite3/scriptures.db")
# Calculate verse length and word counts
volume.names <- c("Old Testament", "New Testament", "Book of Mormon",
"Doctrine and Covenants", "Pearl of Great Price")
scriptures <- tbl(scriptures.db, "scriptures") %>%
as.data.frame() %>%
mutate(num.words = n.words(scripture_text),
num.char = nchar(scripture_text)) %>%
mutate(volume_title = factor(volume_title, levels=volume.names, ordered=TRUE),
volume_title.rev = factor(volume_title, levels=rev(volume.names), ordered=TRUE))
# Shortest verses
scriptures %>%
arrange(num.words) %>%
select(verse_title, num.words, num.char, scripture_text) %>%
slice(1:15)
## verse_title num.words num.char scripture_text
## 1 John 11:35 3 11 Jesus wept.
## 2 1 Thessalonians 5:16 3 17 Rejoice evermore.
## 3 1 Chronicles 1:1 4 19 ADAM, Sheth, Enosh,
## 4 1 Chronicles 1:2 4 25 Kenan, Mahalaleel, Jered,
## 5 1 Chronicles 1:3 4 27 Henoch, Methuselah, Lamech,
## 6 1 Chronicles 1:24 4 23 Shem, Arphaxad, Shelah,
## 7 1 Chronicles 1:25 4 17 Eber, Peleg, Reu,
## 8 1 Chronicles 1:26 4 20 Serug, Nahor, Terah,
## 9 Ezra 10:35 4 26 Benaiah, Bedeiah, Chelluh,
## 10 Ezra 10:36 4 27 Vaniah, Meremoth, Eliashib,
## 11 Ezra 10:40 4 29 Machnadebai, Shashai, Sharai,
## 12 Nehemiah 10:2 4 27 Seraiah, Azariah, Jeremiah,
## 13 Nehemiah 10:3 4 27 Pashur, Amariah, Malchijah,
## 14 Nehemiah 10:4 4 28 Hattush, Shebaniah, Malluch,
## 15 Nehemiah 10:5 4 25 Harim, Meremoth, Obadiah,
# Longest verses by word count
scriptures %>%
arrange(desc(num.words)) %>%
select(verse_title, num.words, num.char) %>%
slice(1:15)
## verse_title num.words num.char
## 1 Doctrine and Covenants 132:19 216 1138
## 2 Joseph Smith--History 1:28 213 1167
## 3 Doctrine and Covenants 128:18 210 1138
## 4 Doctrine and Covenants 135:3 195 1038
## 5 Doctrine and Covenants 127:1 178 924
## 6 Joseph Smith--History 1:64 167 938
## 7 Doctrine and Covenants 135:7 166 894
## 8 Joseph Smith--History 1:60 162 855
## 9 Joseph Smith--History 1:25 158 750
## 10 Joseph Smith--History 1:20 157 817
## 11 Doctrine and Covenants 127:2 155 757
## 12 Doctrine and Covenants 132:7 154 838
## 13 Joseph Smith--History 1:56 151 802
## 14 Joseph Smith--History 1:61 151 851
## 15 Doctrine and Covenants 77:12 149 781
# Longest verses by verse length
scriptures %>%
arrange(desc(num.char)) %>%
select(verse_title, num.words, num.char) %>%
slice(1:15)
## verse_title num.words num.char
## 1 Joseph Smith--History 1:28 213 1167
## 2 Doctrine and Covenants 128:18 210 1138
## 3 Doctrine and Covenants 132:19 216 1138
## 4 Doctrine and Covenants 135:3 195 1038
## 5 Joseph Smith--History 1:64 167 938
## 6 Doctrine and Covenants 127:1 178 924
## 7 Doctrine and Covenants 135:7 166 894
## 8 Joseph Smith--History 1:60 162 855
## 9 Joseph Smith--History 1:61 151 851
## 10 Doctrine and Covenants 132:7 154 838
## 11 Doctrine and Covenants 128:8 146 832
## 12 Joseph Smith--History 1:6 144 831
## 13 Joseph Smith--History 1:20 157 817
## 14 Joseph Smith--History 1:56 151 802
## 15 Doctrine and Covenants 77:12 149 781
# Words per verse per volume
ggplot(scriptures, aes(x=volume_title.rev, y=num.words, fill=volume_title.rev)) +
geom_violin() +
coord_flip() +
labs(x=NULL, y="Number of words per verse") +
scale_fill_manual(values=c("#ECD078", "#D95B43", "#C02942", "#542437", "#53777A"),
guide=FALSE) +
theme_bw()
