This post is inspired by the article: https://fivethirtyeight.com/features/how-to-tell-someones-age-when-all-you-know-is-her-name/
library(tufte)
library(mdsr)
library(tidyverse)
library(ggthemes)
library(extrafont)
loadfonts(device = "win")
baby_names <- make_babynames_dist()
joseph <- baby_names %>%
filter(name == "Joseph" & sex == "M")
joseph %>%
ggplot() +
geom_col(fill = "#b2d7e9", colour = "white", aes(year, count_thousands*alive_prob)) +
geom_col(fill = "#008fd5",colour = "white", data = joseph %>% filter(year == 1975),
aes(year, count_thousands*alive_prob)) +
geom_line(aes(year, count_thousands), size = 1.5) +
geom_text(label = "Number of Josephs\nborn each year\nestimated to be alive.",
colour = "#008fd5", size = 4, family = "Garamond", x = 1927, y = 13) +
geom_text(label = "The median living\nJosephs is 37 years old.",
colour = "grey40", size = 5, family = "Garamond", x = 1991.4, y = 37.3) +
geom_curve(x = 1982, xend = 1974, y = 38, yend = 23,
arrow = arrow(length = unit(0.3, "cm")), curvature = 0.5) +
ylim(0, 42) +
theme_fivethirtyeight() +
theme(text = element_text(family = "Garamond", size = 13, color = "gray30")) +
theme(plot.caption = element_text(size = 13, color = "grey40", family = "Garamond", face = "bold")) +
theme(axis.text.y = element_text(color = "grey30", size = 13)) +
theme(axis.text.x = element_text(color = "grey30", size = 13)) +
theme(plot.subtitle = element_text(color = "gray30", size = 15, family = "Garamond")) +
labs(title = "Age Distribution of American Boys Named Joseph",
subtitle = "By year of birth",
caption = "Data Source: Social Security Administration (SSA).")
Write a function with a red dot indicating the highest number of names while darker bar indicates the number of people whose name it are stil alive:
my_plot <- function(name_baby, sex_baby) {
u <- baby_names %>%
filter(name == name_baby & sex == sex_baby)
df1 <- u %>% slice(which.max(count_thousands*alive_prob))
df2 <- u %>% slice(which.max(count_thousands))
u %>%
ggplot() +
geom_col(fill = "#b2d7e9", colour = "white", aes(year, count_thousands*alive_prob*1000)) +
geom_col(fill = "#008fd5", colour = "white", data = df1,
aes(year, count_thousands*alive_prob*1000)) +
geom_line(aes(year, count_thousands*1000), size = 1.5, color = "grey50") +
geom_point(aes(year, count_thousands*1000), data = df2, color = "red", size = 4) +
labs(x = NULL, y = NULL) +
theme_fivethirtyeight()
}
my_plot("Jenny", "F") +
ylim(0, 3000)
my_plot("Tony", "M") +
ylim(0, 10000)
my_plot("Emma", "F") +
ylim(0, 60000)
Thanks Mr.Dung for your references.