Popularity of Biblical Names in America 1880–2017

Tyler Oldham

9/21/2021

Introduction

This report will visualize and contextualize the popularity of first names appearing in the Revised English Bible compared to all first names of newborns in America from 1880 to 2017. According to Pew Research, religiosity in America is at an all-time low. This report hypothesizes that the proportion of biblical names in America will decrease over time to reflect the changing rates of religiousness.

While the Christian Bible is used to create the dataset, this report includes names found in all Abrahamic religions and makes no effort to analyze the popularity of any specific religion.

Libraries

library(tidyverse)
library(rvest)
library(dplyr)
library(lubridate)
library(scales)
library(ggthemes)

Datasets

This report uses two primary datasets: First names of babies born in America and first names mentioned in the Bible.

Babynames

The babynames dataset is a package for R created by Hadley Wickham using data from the Social Security Administration. The package contains a table of ~2,000,000 instances of names, total count, and proportion of each name by gender per year.

library(babynames)

Names in the Christian Bible

The biblical names dataset is generated from web scraping this comprehensive table of names appearing in the Bible. The source contains variables for name, gender, verse, and alternative names. The script uses the rvest package to scrape and compile the data.

To start, I created data frames from the four pages of the source and merge the data frames into one.

#Collect all four pages of the list
bible0 <- read_html("https://www.behindthename.com/namesakes/list/biblical/alpha")

bible0 <- bible0 %>% 
  html_nodes("table") %>% 
  .[4] %>% 
  html_table(fill = TRUE)
as.data.frame(bible0) -> bible0


bible1 <- read_html("https://www.behindthename.com/namesakes/list/biblical/alpha/2")

bible1 <- bible1 %>% 
  html_nodes("table") %>% 
  .[4] %>% 
  html_table(fill = TRUE)
as.data.frame(bible1) -> bible1


bible2 <- read_html("https://www.behindthename.com/namesakes/list/biblical/alpha/3")

bible2 <- bible2 %>% 
  html_nodes("table") %>% 
  .[4] %>% 
  html_table(fill = TRUE)
as.data.frame(bible2) -> bible2


bible3 <- read_html("https://www.behindthename.com/namesakes/list/biblical/alpha/4")

bible3 <- bible3 %>% 
  html_nodes("table") %>% 
  .[4] %>% 
  html_table(fill = TRUE)
as.data.frame(bible3) -> bible3

# Merge the four
rbind(bible0, bible1, bible2, bible3) -> bibleMerged

#Set column names
colnames(bibleMerged)[1] <- "Name"
colnames(bibleMerged)[2] <- "Gender"
colnames(bibleMerged)[3] <- "Verse"
colnames(bibleMerged)[4] <- "Other Names"
colnames(bibleMerged)[5] <- "Testament"

#Adjust dataframe
bibleMerged = bibleMerged [-1, ]

Then, I created another variable for the Testament origin of the name.

# Set the value to "New" if abbreviated chapter name appear in verse, if else, set value to "Old".
bibleMerged$Testament <- ifelse(grepl("Chr|Tim|John|Acts|Col|Eph|John|Jude|Luke|Mark|Matt|Phlm|Phlp|Rev|Rom|Titus", bibleMerged$Verse), "New", "Old")

Method

Create base subset of only biblical name birth data.

babynamesBible <-merge(bibleMerged, babynames, by.x = "Name", by.y = "name", all.x = FALSE)

Filter dataframes to be plotted

# Create a separate dataframe for the sum of biblical names by year.
babynamesBible %>% 
  group_by(year) %>% 
  summarise(n = sum(n)) -> yearTotals
lubridate::ymd(yearTotals$year, truncated = 2L) -> yearTotals$year

# Create a separate data frame for the average of biblical names by year.
babynamesBible %>% 
  group_by(year) %>% 
  summarise(prop = mean(prop)) -> yearProps
lubridate::ymd(yearProps$year, truncated = 2L) -> yearProps$year

# Create a separate data frame for the sum of biblical names by year segregated by testament origin.
babynamesBible %>% 
  group_by(year, Testament) %>% 
  summarise(n = sum(n)) %>% 
  arrange(year, Testament) -> yearTotalsTestament
lubridate::ymd(yearTotalsTestament$year, truncated = 2L) -> yearTotalsTestament$year

# Create a separate data frame for the average of biblical names by year segregated by testament origin.
babynamesBible %>% 
  group_by(year, Testament) %>% 
  summarise(prop = mean(prop)) %>% 
  arrange(year, Testament) -> yearPropsTestament
lubridate::ymd(yearPropsTestament$year, truncated = 2L) -> yearPropsTestament$year

Results

Annotations have been added to show historical context. There is no proven correlation between the events and the data. Source: Association of Religion Data Archives: Prominent Religious Events and People in American History.

All Biblical Names

Sum of All Biblical Names by Year

# Remove scientific notation
options(scipen=999) 

# Plot
yearTotals %>% 
  ggplot(aes(year, n)) + 
  theme_economist_white(gray_bg=FALSE) + 
  geom_line() + 
  labs(title = "All Births with Biblical Names in America 1880-2017", x =  "Year", y = "Births in Year") + 
  scale_y_continuous(labels  = label_number(scale = 1e-6, suffix = "m", accuracy = 1)) +
  theme(axis.title = element_text(margin = margin(t = 10, r = 10, b = 0, l = 10)), plot.title = element_text(margin=margin(0,0,30,0))) -> allNamesGraph

# Annotate
allNamesGraph + 
  annotate("pointrange", as.Date(x = "1909-01-01"), y = 254354, ymin = 254354, ymax = 2100000, color = "black", size=0.3, alpha = 0.6) + 
  annotate("text", as.Date(x = "1899-01-01"), y = 2200000, label = "Scofield Bible Published", color = "black", size=3) + #Scofield
  annotate("pointrange", as.Date(x = "1914-01-01"), y = 830145, ymin = 830145, ymax = 1700000, color = "black", size=0.3, alpha = 0.6) + 
  annotate("text", as.Date(x = "1920-01-01"), y = 1800000, label = "Start of WW1", color = "black", size=3) + #WW1
  annotate("pointrange", as.Date(x = "1933-01-01"), y = 998112, ymin = 600000, ymax = 998112, color = "black", size=0.3, alpha = 0.6) +
  annotate("text", as.Date(x = "1943-01-01"), y = 500000, label = "Radio Evangelism Boom", color = "black", size=3) + #Radio Evangelism
  annotate("pointrange", as.Date(x = "1939-01-01"), y = 1184350, ymin = 1184350, ymax = 2400000, color = "black", size=0.3, alpha = 0.6) +
  annotate("text", as.Date(x = "1934-01-01"), y = 2500000, label = "Start of WW2", color = "black", size=3) + #WW2
  annotate("segment", x = as.Date("1947-01-01"), xend = as.Date("1991-01-01"), y = 750000, yend = 750000, color = "black", alpha = 0.6) +
  annotate("point", as.Date(x = "1947-01-01"), y = 750000, color = "black", alpha = 0.6) +
  annotate("point", as.Date(x = "1991-01-01"), y = 750000, color = "black", alpha = 0.6) +
  annotate("text", as.Date(x = "1967-01-01"), y = 900000, label = "Cold War", color = "black", size=3) + #Cold War
  annotate("pointrange", as.Date(x = "1963-01-01"), y = 2556743, ymin = 2556743, ymax = 3100000, color = "black", size=0.3, alpha = 0.6) +
  annotate("text", as.Date(x = "1968-01-01"), y = 3200000, label = "Civil Rights Movement", color = "black", size=3) + #Civil Rights Movement
  annotate("pointrange", as.Date(x = "1974-01-01"), y = 2058451, ymin = 1600000, ymax = 2058451, color = "black", size=0.3, alpha = 0.6) +
  annotate("text", as.Date(x = "1974-01-01"), y = 1500000, label = "Nixon Resignation", color = "black", size=3) + #Nixon Resigns
  annotate("pointrange", as.Date(x = "2001-01-01"), y = 2113823, ymin = 2113823, ymax = 2600000, color = "black", size=0.3, alpha = 0.6) +
  annotate("text", as.Date(x = "2001-01-01"), y = 2700000, label = "9/11", color = "black", size=3) -> allNamesGraphAnnotated #9/11
allNamesGraphAnnotated

The frequency of births with Biblical names has generally risen since 1880, though this is likely due to population growth. Note the significant drop in frequency after the end of the cold war, possibly due to religious/cultural disillusionment after the end of the Cold War.

Proportion of All Biblical Names by Year

# Plot
yearProps %>% 
  ggplot(aes(year, prop)) + theme_economist_white(gray_bg=FALSE) + geom_line() + labs(title = "Proportion of Biblical Names in America 1880-2017", x =  "Year", y = "Proportion") + theme(axis.title = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)), plot.title = element_text(margin=margin(0,0,30,0))) + scale_y_continuous(labels = label_number(suffix = "%", scale = 1e+2, accuracy = 0.1)) -> propNamesGraph

# Annotate
propNamesGraph + 
  annotate("pointrange", as.Date(x = "1909-01-01"), y = 0.0032558931, ymin = 0.0019, ymax = 0.0032558931, color = "black", size=0.3, alpha = 0.6) + 
  annotate("text", as.Date(x = "1899-01-01"), y = 0.0018, label = "Scofield Bible Published", color = "black", size=3) + #Scofield
  annotate("pointrange", as.Date(x = "1914-01-01"), y = 0.0024904405, ymin = 0.0024904405, ymax = 0.00365, color = "black", size=0.3, alpha = 0.6) + 
  annotate("text", as.Date(x = "1919-01-01"), y = 0.0038, label = "Start of WW1", color = "black", size=3) + #WW1
  annotate("pointrange", as.Date(x = "1933-01-01"), y = 0.0020026782, ymin = 0.0013, ymax = 0.0020026782, color = "black", size=0.3, alpha = 0.6) +
  annotate("text", as.Date(x = "1933-01-01"), y = 0.0012, label = "Radio Evangelism Boom", color = "black", size=3) + #Radio Evangelism
  annotate("pointrange", as.Date(x = "1939-01-01"), y = 0.0021861022, ymin = 0.0021861022, ymax = 0.0031, color = "black", size=0.3, alpha = 0.6) +
  annotate("text", as.Date(x = "1939-01-01"), y = 0.00325, label = "Start of WW2", color = "black", size=3) + #WW2
  annotate("segment", x = as.Date("1947-01-01"), xend = as.Date("1991-01-01"), y = 0.00415, yend = 0.00415, color = "black", alpha = 0.6) +
  annotate("point", as.Date(x = "1947-01-01"), y = 0.00415, color = "black", alpha = 0.6) +
  annotate("point", as.Date(x = "1991-01-01"), y = 0.00415, color = "black", alpha = 0.6) +
  annotate("text", as.Date(x = "1967-01-01"), y = 0.0043, label = "Cold War", color = "black", size=3) + #Cold War
  annotate("pointrange", as.Date(x = "1963-01-01"), y = 0.0021749466, ymin = 0.0021749466, ymax = 0.00365, color = "black", size=0.3, alpha = 0.6) +
  annotate("text", as.Date(x = "1968-01-01"), y = 0.0038, label = "Civil Rights Movement", color = "black", size=3) + #Civil Rights Movement
  annotate("pointrange", as.Date(x = "1974-01-01"), y = 0.0016972808, ymin = 0.00265, ymax = 0.0016972808, color = "black", size=0.3, alpha = 0.6) +
  annotate("text", as.Date(x = "1984-01-01"), y = 0.0028, label = "Nixon Resignation", color = "black", size=3) + #Nixon Resigns
  annotate("pointrange", as.Date(x = "2001-01-01"), y = 0.0007947869, ymin = 0.0007947869, ymax = 0.00165, color = "black", size=0.3, alpha = 0.6) +
  annotate("text", as.Date(x = "2001-01-01"), y = 0.0018, label = "9/11", color = "black", size=3) -> allNamesGraphAnnotated #9/11
allNamesGraphAnnotated

The proportion of births with biblical names in comparison to all births have decreased over time. This graph seems to follow an inverse trend of the sum graph. The line follows a trend very closely, except for the dip from 1910 to 1950. This dip and the spike from figure 1 seem to coincide, which would show that the spike is very likely due to population growth.

Seperate Data by Testament

These graphs are similar to those in the previous section, with an added variable for testament origin. New Testament names are the red line, while Old Testament names are the blue line. Annotations have been removed for readability.

Sum of Biblical Names by Year by Testament

yearTotalsTestament %>% 
  ggplot(aes(year, n, group = Testament)) + theme_economist_white(gray_bg=FALSE) + geom_line(aes(color = Testament)) + labs(title = "All Births with Biblical Names in America by Testament Origin 1880-2017", x =  "Year", y = "Births in Year") + scale_y_continuous(labels  = label_number(scale = 1e-6, suffix = "m", accuracy = 0.1)) + theme(axis.title = element_text(margin = margin(t = 10, r = 10, b = 0, l = 10)), plot.title = element_text(margin=margin(0,0,30,0), size = 14), legend.background = element_rect(fill = "gray90"), legend.title = element_text(size = 10), legend.text = element_text(size = 10)) -> allNamesTestamentGraph

allNamesTestamentGraph

Both lines begin and remain close together from 1880 to 1910 when a spike in New Testament names causes a divergence between the two lines. The two follow the same trends respectively until the 1990s, where both lines started to converge and then finally met in the 2000s.

Proportion of Biblical Names by Year by Testament

yearPropsTestament %>% 
  ggplot(aes(year, prop, group = Testament)) + theme_economist_white(gray_bg=FALSE) + geom_line(aes(color = Testament)) + labs(title = "Proportion of Births with Biblical Names in America by Testament Origin 1880-2017", x =  "Year", y = "Proportion") + ylim(0, 3000000) + theme(axis.title = element_text(margin = margin(t = 10, r = 10, b = 0, l = 10)), plot.title = element_text(margin=margin(0,0,30,0), size = 14), legend.background = element_rect(fill = "gray90"), legend.title = element_text(size = 10), legend.text = element_text(size = 10)) -> allNamesTestamentGraph

allNamesTestamentGraph + scale_y_continuous(labels = label_number(suffix = "%", scale = 1e+2, accuracy = 0.1)) -> allNamesTestamentGraph
allNamesTestamentGraph

Unlike the previous graph, both lines begin with a significant difference between the proportions. However, they continue to follow respective trends until they converge in the 2000s. While the proportion of New Testament names fall by ~0.5%, change in Old Testament names from 1880 to 2017 only fall by ~0.05%.

Most Popular Names by Gender

Compile a frequency and a proportion dataframe sorted by gender for every year combined, each containing the top ten most popular names.

#Frequency
babynamesBible %>% 
  group_by(Name, Gender) %>%
  summarize(total = sum(n)) %>%
  filter(Gender == "m") %>% 
  arrange(desc(total)) %>% 
  head(10) -> popNamesSumM

babynamesBible %>% 
  group_by(Name, Gender) %>%
  summarize(total = sum(n)) %>%
  filter(Gender == "f") %>% 
  arrange(desc(total)) %>% 
  head(10) -> popNamesSumF

rbind(popNamesSumM, popNamesSumF) -> popNamesSumMerge

# Proportion
babynamesBible %>% 
  group_by(Name, Gender) %>%
  summarize(total = mean(prop)) %>%
  filter(Gender == "m") %>% 
  arrange(desc(total)) %>% 
  head(10) -> popNamesPropM

babynamesBible %>% 
  group_by(Name, Gender) %>%
  summarize(total = mean(prop)) %>%
  filter(Gender == "f") %>% 
  arrange(desc(total)) %>% 
  head(10) -> popNamesPropF

rbind(popNamesPropM, popNamesPropF) -> popNamesPropMerge

Most Popular Names by Frequency

Figure 5

popNamesSumMerge %>%
  ggplot(aes(total, reorder(Name, total), fill = Gender)) + geom_col() + scale_x_continuous(labels = label_number(suffix = "m", scale = 1e-6, accuracy = 1)) + labs(title = "Most Popular Male Biblical Names", x =  "Total Births with Name", y = "Name") + theme_economist_white(horizontal=FALSE, gray_bg=FALSE) + theme(legend.position = "none", panel.border = element_rect(color = "black", size = 1, fill = NA), axis.title = element_text(margin = margin(t = 10, r = 10, b = 0, l = 10)), plot.title = element_text(margin=margin(0,0,30,0), size = 14)) + facet_wrap(~Gender, scales = "free_y") -> popNamesSumGraph
popNamesSumGraph

Most Popular Names by Proportion

Figure 6

popNamesPropMerge %>%
  ggplot(aes(total, reorder(Name, total), fill = Gender)) + geom_col() + scale_x_continuous(labels = label_number(suffix = "%", scale = 1e+2, accuracy = 0.1)) + labs(title = "Most Popular Male Biblical Names", x =  "Proportion", y = "Name") + theme_economist_white(horizontal=FALSE, gray_bg=FALSE) + theme(legend.position = "none", panel.border = element_rect(color = "black", size = 1, fill = NA), axis.title = element_text(margin = margin(t = 10, r = 10, b = 0, l = 10)), plot.title = element_text(margin=margin(0,0,30,0), size = 14)) + facet_wrap(~Gender, scales = "free_y") -> popNamesPropGraph
popNamesPropGraph

Conclusion

The hypothesis that the proportion of Biblical names in America is decreasing over time is confirmed.