# Load libraries
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)


# Helpful function
n.words <- function(x) {
  sapply(gregexpr("\\W+", x), length) + 1
}


# Load scriptures from SQLite database
# Download from http://scriptures.nephi.org
scriptures.db <- src_sqlite("~/Downloads/lds-scriptures-3.0/sqlite3/scriptures.db")

# Calculate verse length and word counts
volume.names <- c("Old Testament", "New Testament", "Book of Mormon",
                  "Doctrine and Covenants", "Pearl of Great Price")

scriptures <- tbl(scriptures.db, "scriptures") %>%
  as.data.frame() %>%
  mutate(num.words = n.words(scripture_text),
         num.char = nchar(scripture_text)) %>%
  mutate(volume_title = factor(volume_title, levels=volume.names, ordered=TRUE),
         volume_title.rev = factor(volume_title, levels=rev(volume.names), ordered=TRUE))

# Shortest verses
scriptures %>%
  arrange(num.words) %>%
  select(verse_title, num.words, num.char, scripture_text) %>%
  slice(1:15)
##             verse_title num.words num.char                scripture_text
## 1            John 11:35         3       11                   Jesus wept.
## 2  1 Thessalonians 5:16         3       17             Rejoice evermore.
## 3      1 Chronicles 1:1         4       19           ADAM, Sheth, Enosh,
## 4      1 Chronicles 1:2         4       25     Kenan, Mahalaleel, Jered,
## 5      1 Chronicles 1:3         4       27   Henoch, Methuselah, Lamech,
## 6     1 Chronicles 1:24         4       23       Shem, Arphaxad, Shelah,
## 7     1 Chronicles 1:25         4       17             Eber, Peleg, Reu,
## 8     1 Chronicles 1:26         4       20          Serug, Nahor, Terah,
## 9            Ezra 10:35         4       26    Benaiah, Bedeiah, Chelluh,
## 10           Ezra 10:36         4       27   Vaniah, Meremoth, Eliashib,
## 11           Ezra 10:40         4       29 Machnadebai, Shashai, Sharai,
## 12        Nehemiah 10:2         4       27   Seraiah, Azariah, Jeremiah,
## 13        Nehemiah 10:3         4       27   Pashur, Amariah, Malchijah,
## 14        Nehemiah 10:4         4       28  Hattush, Shebaniah, Malluch,
## 15        Nehemiah 10:5         4       25     Harim, Meremoth, Obadiah,
# Longest verses by word count
scriptures %>%
  arrange(desc(num.words)) %>%
  select(verse_title, num.words, num.char) %>%
  slice(1:15)
##                      verse_title num.words num.char
## 1  Doctrine and Covenants 132:19       216     1138
## 2     Joseph Smith--History 1:28       213     1167
## 3  Doctrine and Covenants 128:18       210     1138
## 4   Doctrine and Covenants 135:3       195     1038
## 5   Doctrine and Covenants 127:1       178      924
## 6     Joseph Smith--History 1:64       167      938
## 7   Doctrine and Covenants 135:7       166      894
## 8     Joseph Smith--History 1:60       162      855
## 9     Joseph Smith--History 1:25       158      750
## 10    Joseph Smith--History 1:20       157      817
## 11  Doctrine and Covenants 127:2       155      757
## 12  Doctrine and Covenants 132:7       154      838
## 13    Joseph Smith--History 1:56       151      802
## 14    Joseph Smith--History 1:61       151      851
## 15  Doctrine and Covenants 77:12       149      781
# Longest verses by verse length
scriptures %>%
  arrange(desc(num.char)) %>%
  select(verse_title, num.words, num.char) %>%
  slice(1:15)
##                      verse_title num.words num.char
## 1     Joseph Smith--History 1:28       213     1167
## 2  Doctrine and Covenants 128:18       210     1138
## 3  Doctrine and Covenants 132:19       216     1138
## 4   Doctrine and Covenants 135:3       195     1038
## 5     Joseph Smith--History 1:64       167      938
## 6   Doctrine and Covenants 127:1       178      924
## 7   Doctrine and Covenants 135:7       166      894
## 8     Joseph Smith--History 1:60       162      855
## 9     Joseph Smith--History 1:61       151      851
## 10  Doctrine and Covenants 132:7       154      838
## 11  Doctrine and Covenants 128:8       146      832
## 12     Joseph Smith--History 1:6       144      831
## 13    Joseph Smith--History 1:20       157      817
## 14    Joseph Smith--History 1:56       151      802
## 15  Doctrine and Covenants 77:12       149      781
# Words per verse per volume
ggplot(scriptures, aes(x=volume_title.rev, y=num.words, fill=volume_title.rev)) + 
  geom_violin() + 
  coord_flip() + 
  labs(x=NULL, y="Number of words per verse") + 
  scale_fill_manual(values=c("#ECD078", "#D95B43", "#C02942", "#542437", "#53777A"), 
                    guide=FALSE) + 
  theme_bw()