The original chart was created by FiveThirtyEight. We can recreate the chart by using R/ggplot2:
R codes for replicating the chart:
# Clear R environment:
rm(list = ls())
# Load some packages:
library(mdsr)
library(tidyverse)
# Load baby name data:
<- make_babynames_dist()
babynames
# Filter Joseph:
<- babynames %>%
joseph filter(name == "Joseph" & sex == "M") %>%
mutate(n_alive = count_thousands*alive_prob)
# Joseph in 1975:
%>% filter(year == 1975) -> joseph75
joseph
# Prepare colors for ploting:
<- "#0c94d6"
col_text
<- "#1f9cd9"
col_highlight
<- "#85c3e1"
col_normal
<- "#f0f0f0"
col_bgr
<- "#d3d3d3"
col_grid
<- "#5b5e5f"
col_grey
# Select font for graph:
library(extrafont) # Some problems: https://stackoverflow.com/questions/61204259/how-can-i-resolve-the-no-font-name-issue-when-importing-fonts-into-r-using-ext
loadfonts(device = "win", quiet = TRUE)
<- "Arial Black"
my_font
# Recreate the chart:
<- as.character(seq(1900, 2010, 10))
x_labels
<- c("1900", "2000")
some_years
case_when(!x_labels %in% some_years ~ str_c("' ", str_sub(x_labels, 3, 4)), TRUE ~ x_labels) -> x_labels
<- c(seq(0, 30, 10), "40 k")
y_lables
%>%
joseph ggplot() +
geom_col(aes(x = year, y = n_alive), fill = col_normal, width = 0.77) +
geom_col(data = joseph75, aes(x = year, y = n_alive), fill = col_highlight, width = 0.75) +
geom_line(aes(x = year, y = count_thousands), size = 1.5, color = "grey20") +
theme(plot.background = element_rect(fill = col_bgr, color = col_bgr)) +
theme(panel.background = element_rect(fill = col_bgr, color = col_bgr)) +
theme(panel.grid.minor = element_blank()) +
theme(panel.grid.major = element_line(color = col_grid)) +
theme(axis.ticks = element_blank()) +
theme(axis.title = element_blank()) +
theme(plot.margin = unit(c(1.2, 0.6, 1.2, 0.4), "cm")) +
scale_y_continuous(limits = c(0, 40), expand = c(0, 0), labels = y_lables) +
scale_x_continuous(breaks = seq(1900, 2010, 10), labels = x_labels,
limits = c(1900 - 5, 2011), expand = c(0, 0)) +
labs(title = "Age Distribution of American Boys Named Joseph",
subtitle = "By year of birth",
# caption = c("FIVETHIRTYEIGHT", "SOURCE: SOCIAL SECURITY ADMINISTRATION")
+
) # theme(plot.caption = element_text(hjust = c(0, 1))) +
geom_text(label = "Number of Josephs\nborn each year\nestimated to be alive.",
colour = col_text,
size = 4.5,
family = my_font,
x = 1927,
y = 13) +
geom_text(label = "The median living\nJosephs is 37 years old.",
colour = "grey35",
size = 4.5,
family = "Arial Narrow",
x = 1989,
y = 37.3) +
geom_curve(x = 1982,
xend = 1974,
y = 38,
yend = 23,
arrow = arrow(length = unit(0.3, "cm")),
curvature = 0.5) +
geom_text(label = "Number of Josephs born each year\nestimated to be alive on 01/01/2014.",
colour = "grey20",
x = 1920,
y = 30,
size = 4.5,
family = my_font) +
theme(plot.title = element_text(family = my_font, size = 18, hjust = 0, vjust = 7, color = "grey30")) +
theme(plot.subtitle = element_text(family = "Arial", size = 15, color = "grey30", vjust = 8)) +
# theme(plot.caption = element_text(family = my_font, color = "grey50", vjust = -5)) +
theme(axis.text = element_text(family = my_font, size = 11, color = "grey40")) +
theme(plot.title.position = "plot")
library(grid)
grid.rect(x = 0, y = 0,
width = 10,
height = 0.03*3,
gp = gpar(fill = "grey40", col = "grey40"))
grid.text(
label = "FIVETHIRTYEIGHT",
x = 0.01*3.8,
y = 0.01*1.7,
just = c("left", "bottom"),
gp = gpar(fontsize = 10, col = "white", fontfamily = "Arial Narrow", fontface = "bold")
)
grid.text(
label = "SOURCE: SOCIAL SECURITY ADMINISTRATION",
x = 0.98,
y = 0.01*1.7,
just = c("right", "bottom"),
gp = gpar(fontsize = 10, col = "white", fontfamily = "Arial Narrow", fontface = "bold")
)