knitr::opts_chunk$set(class.source = "foldable")

## installing packages
library(dplyr)
library(ggplot2)
library(tidyr)
library(tidyverse)
library(tidycensus)
knitr::opts_chunk$set(class.source = "foldable")

## natality data set 
file_path <- "/Users/aisli/OneDrive/Documents/classes/PH460/data/natality.txt"
natality <- read.table(file_path, header = TRUE, sep = "\t", stringsAsFactors = FALSE, nrows = 1513)

natality <- natality[, -c(1,3,5,6,8)] 
colnames(natality) <- c("census_region", "race", "age", "year", "births")
natality$census_region <- gsub("^Census Region [0-9]+: ", "", natality$census_region)

natality$year <- as.numeric(natality$year)
natality$births <- as.numeric(natality$births)

natality <- natality %>%
  filter(!age %in% c("15", "45-49", "50+")) 

natality <- natality %>%
  mutate(age = case_when(
    age %in% c("35-39", "40-44") ~ "35-44", TRUE ~ age))

natality <- natality %>%
  group_by(census_region, year, race, age) %>%
  summarise(births = sum(births), .groups = "drop")

natality <- natality %>%
  filter(year != 2023)
knitr::opts_chunk$set(class.source = "foldable")

## load population data
v17 <- load_variables(2017, "acs5", cache = TRUE)

## code for each age group for each year
pop_2016_15 <- get_acs(
  geography = "region", 
  variables = c(
    `White_15-17` = "B01001A_021",
    `Black or African American_15-17` = "B01001B_021",
    `American Indian or Alaska Native_15-17` = "B01001C_021",
    `Asian_15-17` = "B01001D_021", 
    `Native Hawaiian or Other Pacific Islander_15-17` = "B01001E_021",
    `More than one race_15-17` = "B01001G_021"
  ),
  year = 2016
) %>%
  mutate(Year = 2016)

pop_2017_15 <- get_acs(
  geography = "region", 
  variables = c(
    `White_15-17` = "B01001A_021",
    `Black or African American_15-17` = "B01001B_021",
    `American Indian or Alaska Native_15-17` = "B01001C_021",
    `Asian_15-17` = "B01001D_021", 
    `Native Hawaiian or Other Pacific Islander_15-17` = "B01001E_021",
    `More than one race_15-17` = "B01001G_021"
  ),
  year = 2017
) %>%
  mutate(Year = 2017)

pop_2018_15 <- get_acs(
  geography = "region", 
  variables = c(
    `White_15-17` = "B01001A_021",
    `Black or African American_15-17` = "B01001B_021",
    `American Indian or Alaska Native_15-17` = "B01001C_021",
    `Asian_15-17` = "B01001D_021", 
    `Native Hawaiian or Other Pacific Islander_15-17` = "B01001E_021",
    `More than one race_15-17` = "B01001G_021"
  ),
  year = 2018
) %>%
  mutate(Year = 2018)

pop_2019_15 <- get_acs(
  geography = "region", 
  variables = c(
    `White_15-17` = "B01001A_021",
    `Black or African American_15-17` = "B01001B_021",
    `American Indian or Alaska Native_15-17` = "B01001C_021",
    `Asian_15-17` = "B01001D_021", 
    `Native Hawaiian or Other Pacific Islander_15-17` = "B01001E_021",
    `More than one race_15-17` = "B01001G_021"
  ),
  year = 2019
) %>%
  mutate(Year = 2019)

pop_2020_15 <- get_acs(
  geography = "region", 
  variables = c(
    `White_15-17` = "B01001A_021",
    `Black or African American_15-17` = "B01001B_021",
    `American Indian or Alaska Native_15-17` = "B01001C_021",
    `Asian_15-17` = "B01001D_021", 
    `Native Hawaiian or Other Pacific Islander_15-17` = "B01001E_021",
    `More than one race_15-17` = "B01001G_021"
  ),
  year = 2020
) %>%
  mutate(Year = 2020)

pop_2021_15 <- get_acs(
  geography = "region", 
  variables = c(
    `White_15-17` = "B01001A_021",
    `Black or African American_15-17` = "B01001B_021",
    `American Indian or Alaska Native_15-17` = "B01001C_021",
    `Asian_15-17` = "B01001D_021", 
    `Native Hawaiian or Other Pacific Islander_15-17` = "B01001E_021",
    `More than one race_15-17` = "B01001G_021"
  ),
  year = 2021
) %>%
  mutate(Year = 2021)

pop_2022_15 <- get_acs(
  geography = "region", 
  variables = c(
    `White_15-17` = "B01001A_021",
    `Black or African American_15-17` = "B01001B_021",
    `American Indian or Alaska Native_15-17` = "B01001C_021",
    `Asian_15-17` = "B01001D_021", 
    `Native Hawaiian or Other Pacific Islander_15-17` = "B01001E_021",
    `More than one race_15-17` = "B01001G_021"
  ),
  year = 2022
) %>%
  mutate(Year = 2022)

## 18-19
pop_2016_18 <- get_acs(
  geography = "region", 
  variables = c(
    `White_18-19` = "B01001A_022",
    `Black or African American_18-19` = "B01001B_022",
    `American Indian or Alaska Native_18-19` = "B01001C_022",
    `Asian_18-19` = "B01001D_022", 
    `Native Hawaiian or Other Pacific Islander_18-19` = "B01001E_022",
    `More than one race_18-19` = "B01001G_022"
  ),
  year = 2016
) %>%
  mutate(Year = 2016)

pop_2017_18 <- get_acs(
  geography = "region", 
  variables = c(
    `White_18-19` = "B01001A_022",
    `Black or African American_18-19` = "B01001B_022",
    `American Indian or Alaska Native_18-19` = "B01001C_022",
    `Asian_18-19` = "B01001D_022", 
    `Native Hawaiian or Other Pacific Islander_18-19` = "B01001E_022",
    `More than one race_18-19` = "B01001G_022"
  ),
  year = 2017
) %>%
  mutate(Year = 2017)

pop_2018_18 <- get_acs(
  geography = "region", 
  variables = c(
    `White_18-19` = "B01001A_022",
    `Black or African American_18-19` = "B01001B_022",
    `American Indian or Alaska Native_18-19` = "B01001C_022",
    `Asian_18-19` = "B01001D_022", 
    `Native Hawaiian or Other Pacific Islander_18-19` = "B01001E_022",
    `More than one race_18-19` = "B01001G_022"
  ),
  year = 2018
) %>%
  mutate(Year = 2018)

pop_2019_18 <- get_acs(
  geography = "region", 
  variables = c(
    `White_18-19` = "B01001A_022",
    `Black or African American_18-19` = "B01001B_022",
    `American Indian or Alaska Native_18-19` = "B01001C_022",
    `Asian_18-19` = "B01001D_022", 
    `Native Hawaiian or Other Pacific Islander_18-19` = "B01001E_022",
    `More than one race_18-19` = "B01001G_022"
  ),
  year = 2019
) %>%
  mutate(Year = 2019)

pop_2020_18 <- get_acs(
  geography = "region", 
  variables = c(
    `White_18-19` = "B01001A_022",
    `Black or African American_18-19` = "B01001B_022",
    `American Indian or Alaska Native_18-19` = "B01001C_022",
    `Asian_18-19` = "B01001D_022", 
    `Native Hawaiian or Other Pacific Islander_18-19` = "B01001E_022",
    `More than one race_18-19` = "B01001G_022"
  ),
  year = 2020
) %>%
  mutate(Year = 2020)

pop_2021_18 <- get_acs(
  geography = "region", 
  variables = c(
    `White_18-19` = "B01001A_022",
    `Black or African American_18-19` = "B01001B_022",
    `American Indian or Alaska Native_18-19` = "B01001C_022",
    `Asian_18-19` = "B01001D_022", 
    `Native Hawaiian or Other Pacific Islander_18-19` = "B01001E_022",
    `More than one race_18-19` = "B01001G_022"
  ),
  year = 2021
) %>%
  mutate(Year = 2021)

pop_2022_18 <- get_acs(
  geography = "region", 
  variables = c(
    `White_18-19` = "B01001A_022",
    `Black or African American_18-19` = "B01001B_022",
    `American Indian or Alaska Native_18-19` = "B01001C_022",
    `Asian_18-19` = "B01001D_022", 
    `Native Hawaiian or Other Pacific Islander_18-19` = "B01001E_022",
    `More than one race_18-19` = "B01001G_022"
  ),
  year = 2022
) %>%
  mutate(Year = 2022)

## 20-24
pop_2016_20 <- get_acs(
  geography = "region", 
  variables = c(
    `White_20-24` = "B01001A_023",
    `Black or African American_20-24` = "B01001B_023",
    `American Indian or Alaska Native_20-24` = "B01001C_023",
    `Asian_20-24` = "B01001D_023", 
    `Native Hawaiian or Other Pacific Islander_20-24` = "B01001E_023",
    `More than one race_20-24` = "B01001G_023"
  ),
  year = 2016
) %>%
  mutate(Year = 2016)

pop_2017_20 <- get_acs(
  geography = "region", 
  variables = c(
    `White_20-24` = "B01001A_023",
    `Black or African American_20-24` = "B01001B_023",
    `American Indian or Alaska Native_20-24` = "B01001C_023",
    `Asian_20-24` = "B01001D_023", 
    `Native Hawaiian or Other Pacific Islander_20-24` = "B01001E_023",
    `More than one race_20-24` = "B01001G_023"
  ),
  year = 2017
) %>%
  mutate(Year = 2017)

pop_2018_20 <- get_acs(
  geography = "region", 
  variables = c(
    `White_20-24` = "B01001A_023",
    `Black or African American_20-24` = "B01001B_023",
    `American Indian or Alaska Native_20-24` = "B01001C_023",
    `Asian_20-24` = "B01001D_023", 
    `Native Hawaiian or Other Pacific Islander_20-24` = "B01001E_023",
    `More than one race_20-24` = "B01001G_023"
  ),
  year = 2018
) %>%
  mutate(Year = 2018)

pop_2019_20 <- get_acs(
  geography = "region", 
  variables = c(
    `White_20-24` = "B01001A_023",
    `Black or African American_20-24` = "B01001B_023",
    `American Indian or Alaska Native_20-24` = "B01001C_023",
    `Asian_20-24` = "B01001D_023", 
    `Native Hawaiian or Other Pacific Islander_20-24` = "B01001E_023",
    `More than one race_20-24` = "B01001G_023"
  ),
  year = 2019
) %>%
  mutate(Year = 2019)

pop_2020_20 <- get_acs(
  geography = "region", 
  variables = c(
    `White_20-24` = "B01001A_023",
    `Black or African American_20-24` = "B01001B_023",
    `American Indian or Alaska Native_20-24` = "B01001C_023",
    `Asian_20-24` = "B01001D_023", 
    `Native Hawaiian or Other Pacific Islander_20-24` = "B01001E_023",
    `More than one race_20-24` = "B01001G_023"
  ),
  year = 2020
) %>%
  mutate(Year = 2020)

pop_2021_20 <- get_acs(
  geography = "region", 
  variables = c(
    `White_20-24` = "B01001A_023",
    `Black or African American_20-24` = "B01001B_023",
    `American Indian or Alaska Native_20-24` = "B01001C_023",
    `Asian_20-24` = "B01001D_023", 
    `Native Hawaiian or Other Pacific Islander_20-24` = "B01001E_023",
    `More than one race_20-24` = "B01001G_023"
  ),
  year = 2021
) %>%
  mutate(Year = 2021)

pop_2022_20 <- get_acs(
  geography = "region", 
  variables = c(
    `White_20-24` = "B01001A_023",
    `Black or African American_20-24` = "B01001B_023",
    `American Indian or Alaska Native_20-24` = "B01001C_023",
    `Asian_20-24` = "B01001D_023", 
    `Native Hawaiian or Other Pacific Islander_20-24` = "B01001E_023",
    `More than one race_20-24` = "B01001G_023"
  ),
  year = 2022
) %>%
  mutate(Year = 2022)

## 25-29
pop_2016_25 <- get_acs(
  geography = "region", 
  variables = c(
    `White_25-29` = "B01001A_024",
    `Black or African American_25-29` = "B01001B_024",
    `American Indian or Alaska Native_25-29` = "B01001C_024",
    `Asian_25-29` = "B01001D_024", 
    `Native Hawaiian or Other Pacific Islander_25-29` = "B01001E_024",
    `More than one race_25-29` = "B01001G_024"
  ),
  year = 2016
) %>%
  mutate(Year = 2016)

pop_2017_25 <- get_acs(
  geography = "region", 
  variables = c(
    `White_25-29` = "B01001A_024",
    `Black or African American_25-29` = "B01001B_024",
    `American Indian or Alaska Native_25-29` = "B01001C_024",
    `Asian_25-29` = "B01001D_024", 
    `Native Hawaiian or Other Pacific Islander_25-29` = "B01001E_024",
    `More than one race_25-29` = "B01001G_024"
  ),
  year = 2017
) %>%
  mutate(Year = 2017)

pop_2018_25 <- get_acs(
  geography = "region", 
  variables = c(
    `White_25-29` = "B01001A_024",
    `Black or African American_25-29` = "B01001B_024",
    `American Indian or Alaska Native_25-29` = "B01001C_024",
    `Asian_25-29` = "B01001D_024", 
    `Native Hawaiian or Other Pacific Islander_25-29` = "B01001E_024",
    `More than one race_25-29` = "B01001G_024"
  ),
  year = 2018
) %>%
  mutate(Year = 2018)

pop_2019_25 <- get_acs(
  geography = "region", 
  variables = c(
    `White_25-29` = "B01001A_024",
    `Black or African American_25-29` = "B01001B_024",
    `American Indian or Alaska Native_25-29` = "B01001C_024",
    `Asian_25-29` = "B01001D_024", 
    `Native Hawaiian or Other Pacific Islander_25-29` = "B01001E_024",
    `More than one race_25-29` = "B01001G_024"
  ),
  year = 2019
) %>%
  mutate(Year = 2019)

pop_2020_25 <- get_acs(
  geography = "region", 
  variables = c(
    `White_25-29` = "B01001A_024",
    `Black or African American_25-29` = "B01001B_024",
    `American Indian or Alaska Native_25-29` = "B01001C_024",
    `Asian_25-29` = "B01001D_024", 
    `Native Hawaiian or Other Pacific Islander_25-29` = "B01001E_024",
    `More than one race_25-29` = "B01001G_024"
  ),
  year = 2020
) %>%
  mutate(Year = 2020)

pop_2021_25 <- get_acs(
  geography = "region", 
  variables = c(
    `White_25-29` = "B01001A_024",
    `Black or African American_25-29` = "B01001B_024",
    `American Indian or Alaska Native_25-29` = "B01001C_024",
    `Asian_25-29` = "B01001D_024", 
    `Native Hawaiian or Other Pacific Islander_25-29` = "B01001E_024",
    `More than one race_25-29` = "B01001G_024"
  ),
  year = 2021
) %>%
  mutate(Year = 2021)

pop_2022_25 <- get_acs(
  geography = "region", 
  variables = c(
    `White_25-29` = "B01001A_024",
    `Black or African American_25-29` = "B01001B_024",
    `American Indian or Alaska Native_25-29` = "B01001C_024",
    `Asian_25-29` = "B01001D_024", 
    `Native Hawaiian or Other Pacific Islander_25-29` = "B01001E_024",
    `More than one race_25-29` = "B01001G_024"
  ),
  year = 2022
) %>%
  mutate(Year = 2022)

## 30-34
pop_2016_30 <- get_acs(
  geography= "region", variables = c( 
  `White_30-34` = "B01001A_025",
  `Black or African American_30-34` = "B01001B_025",
  `American Indian or Alaska Native_30-34` = "B01001C_025",
  `Asian_30-35` = "B01001D_025",
  `Native Hawaiian or Other Pacific Islander_30-34` = "B01001E_025",
  `More than one race_30-34` = "B01001G_025"), 
  year = 2016) %>%
  mutate(Year= 2016)

pop_2017_30 <- get_acs(
  geography= "region", variables = c( 
  `White_30-34` = "B01001A_025",
  `Black or African American_30-34` = "B01001B_025",
  `American Indian or Alaska Native_30-35` = "B01001C_025",
  `Asian_30-34` = "B01001D_025",
  `Native Hawaiian or Other Pacific Islander_30-34` = "B01001E_025",
  `More than one race_30-34` = "B01001G_025"), 
  year = 2017) %>%
  mutate(Year= 2017)

pop_2018_30 <- get_acs(
  geography= "region", variables = c( 
  `White_30-34` = "B01001A_025",
  `Black or African American_30-34` = "B01001B_025",
  `American Indian or Alaska Native_30-34` = "B01001C_025",
  `Asian_30-34` = "B01001D_025",
  `Native Hawaiian or Other Pacific Islander_30-34` = "B01001E_025",
  `More than one race_30-34` = "B01001G_025"), 
  year = 2018) %>%
  mutate(Year= 2018)

pop_2019_30 <- get_acs(
  geography= "region", variables = c( 
  `White_30-34` = "B01001A_025",
  `Black or African American_30-34` = "B01001B_025",
  `American Indian or Alaska Native_30-34` = "B01001C_025",
  `Asian_30-34` = "B01001D_025",
  `Native Hawaiian or Other Pacific Islander_30-34` = "B01001E_025",
  `More than one race_30-34` = "B01001G_025"), 
  year = 2019) %>%
  mutate(Year= 2019)

pop_2020_30 <- get_acs(
  geography= "region", variables = c( 
  `White_30-34` = "B01001A_025",
  `Black or African American_30-34` = "B01001B_025",
  `American Indian or Alaska Native_30-34` = "B01001C_025",
  `Asian_30-34` = "B01001D_025",
  `Native Hawaiian or Other Pacific Islander_30-34` = "B01001E_025",
  `More than one race_30-34` = "B01001G_025"), 
  year = 2020) %>%
  mutate(Year= 2020)

pop_2021_30 <- get_acs(
  geography= "region", variables = c( 
  `White_30-34` = "B01001A_025",
  `Black or African American_30-34` = "B01001B_025",
  `American Indian or Alaska Native_30-34` = "B01001C_025",
  `Asian_30-34` = "B01001D_025",
  `Native Hawaiian or Other Pacific Islander_30-34` = "B01001E_025",
  `More than one race_30-34` = "B01001G_025"), 
  year = 2021) %>%
  mutate(Year= 2021)

pop_2022_30 <- get_acs(
  geography= "region", variables = c( 
  `White_30-34` = "B01001A_025",
  `Black or African American_30-34` = "B01001B_025",
  `American Indian or Alaska Native_30-34` = "B01001C_025",
  `Asian_30-34` = "B01001D_025",
  `Native Hawaiian or Other Pacific Islander_30-34` = "B01001E_025",
  `More than one race_30-34` = "B01001G_025"), 
  year = 2022) %>%
  mutate(Year= 2022)

## 35-44
pop_2016_35 <- get_acs(
  geography = "region",
  variables = c(
    `White_35-44` = "B01001A_026",
    `Black or African American_35-44` = "B01001B_026",
    `American Indian or Alaska Native_35-44` = "B01001C_026",
    `Asian_35-44` = "B01001D_026", 
    `Native Hawaiian or Other Pacific Islander_35-44` = "B01001E_026",
    `More than one race_35-44` = "B01001G_026"
  ),
  year = 2016) %>%
  mutate(Year = 2016)

pop_2017_35 <- get_acs(
  geography = "region",
  variables = c(
    `White_35-44` = "B01001A_026",
    `Black or African American_35-44` = "B01001B_026",
    `American Indian or Alaska Native_35-44` = "B01001C_026",
    `Asian_35-44` = "B01001D_026", 
    `Native Hawaiian or Other Pacific Islander_35-44` = "B01001E_026",
    `More than one race_35-44` = "B01001G_026"
  ),
  year = 2017) %>%
  mutate(Year = 2017)

pop_2018_35 <- get_acs(
  geography = "region",
  variables = c(
    `White_35-44` = "B01001A_026",
    `Black or African American_35-44` = "B01001B_026",
    `American Indian or Alaska Native_35-44` = "B01001C_026",
    `Asian_35-44` = "B01001D_026", 
    `Native Hawaiian or Other Pacific Islander_35-44` = "B01001E_026",
    `More than one race_35-44` = "B01001G_026"
  ),
  year = 2018) %>%
  mutate(Year = 2018)

pop_2019_35 <- get_acs(
  geography = "region",
  variables = c(
    `White_35-44` = "B01001A_026",
    `Black or African American_35-44` = "B01001B_026",
    `American Indian or Alaska Native_35-44` = "B01001C_026",
    `Asian_35-44` = "B01001D_026", 
    `Native Hawaiian or Other Pacific Islander_35-44` = "B01001E_026",
    `More than one race_35-44` = "B01001G_026"
  ),
  year = 2019) %>%
  mutate(Year = 2019)

pop_2020_35 <- get_acs(
  geography = "region",
  variables = c(
    `White_35-44` = "B01001A_026",
    `Black or African American_35-44` = "B01001B_026",
    `American Indian or Alaska Native_35-44` = "B01001C_026",
    `Asian_35-44` = "B01001D_026", 
    `Native Hawaiian or Other Pacific Islander_35-44` = "B01001E_026",
    `More than one race_35-44` = "B01001G_026"
  ),
  year = 2020) %>%
  mutate(Year = 2020)

pop_2021_35 <- get_acs(
  geography = "region",
  variables = c(
    `White_35-44` = "B01001A_026",
    `Black or African American_35-44` = "B01001B_026",
    `American Indian or Alaska Native_35-44` = "B01001C_026",
    `Asian_35-44` = "B01001D_026", 
    `Native Hawaiian or Other Pacific Islander_35-44` = "B01001E_026",
    `More than one race_35-44` = "B01001G_026"
  ),
  year = 2021) %>%
  mutate(Year = 2021)

pop_2022_35 <- get_acs(
  geography = "region",
  variables = c(
    `White_35-44` = "B01001A_026",
    `Black or African American_35-44` = "B01001B_026",
    `American Indian or Alaska Native_35-44` = "B01001C_026",
    `Asian_35-44` = "B01001D_026", 
    `Native Hawaiian or Other Pacific Islander_35-44` = "B01001E_026",
    `More than one race_35-44` = "B01001G_026"
  ),
  year = 2022) %>%
  mutate(Year = 2022)

## 45-54
pop_2016_45 <- get_acs(
  geography = "region",
  variables = c(
    `White_35-44` = "B01001A_027",
    `Black or African American_35-44` = "B01001B_027",
    `American Indian or Alaska Native_35-44` = "B01001C_027",
    `Asian_35-44` = "B01001D_027", 
    `Native Hawaiian or Other Pacific Islander_35-44` = "B01001E_027",
    `More than one race_35-44` = "B01001G_027"
  ),
  year = 2016) %>%
  mutate(Year = 2016)

pop_2017_45 <- get_acs(
  geography = "region",
  variables = c(
    `White_35-44` = "B01001A_027",
    `Black or African American_35-44` = "B01001B_027",
    `American Indian or Alaska Native_35-44` = "B01001C_027",
    `Asian_35-44` = "B01001D_027", 
    `Native Hawaiian or Other Pacific Islander_35-44` = "B01001E_027",
    `More than one race_35-44` = "B01001G_027"
  ),
  year = 2017) %>%
  mutate(Year = 2017)

pop_2018_45 <- get_acs(
  geography = "region",
  variables = c(
    `White_35-44` = "B01001A_027",
    `Black or African American_35-44` = "B01001B_027",
    `American Indian or Alaska Native_35-44` = "B01001C_027",
    `Asian_35-44` = "B01001D_027", 
    `Native Hawaiian or Other Pacific Islander_35-44` = "B01001E_027",
    `More than one race_35-44` = "B01001G_027"
  ),
  year = 2018) %>%
  mutate(Year = 2018)

pop_2019_45 <- get_acs(
  geography = "region",
  variables = c(
    `White_35-44` = "B01001A_027",
    `Black or African American_35-44` = "B01001B_027",
    `American Indian or Alaska Native_35-44` = "B01001C_027",
    `Asian_35-44` = "B01001D_027", 
    `Native Hawaiian or Other Pacific Islander_35-44` = "B01001E_027",
    `More than one race_35-44` = "B01001G_027"
  ),
  year = 2019) %>%
  mutate(Year = 2019)

pop_2020_45 <- get_acs(
  geography = "region",
  variables = c(
    `White_35-44` = "B01001A_027",
    `Black or African American_35-44` = "B01001B_027",
    `American Indian or Alaska Native_35-44` = "B01001C_027",
    `Asian_35-44` = "B01001D_027", 
    `Native Hawaiian or Other Pacific Islander_35-44` = "B01001E_027",
    `More than one race_35-44` = "B01001G_027"
  ),
  year = 2020) %>%
  mutate(Year = 2020)

pop_2021_45 <- get_acs(
  geography = "region",
  variables = c(
    `White_35-44` = "B01001A_027",
    `Black or African American_35-44` = "B01001B_027",
    `American Indian or Alaska Native_35-44` = "B01001C_027",
    `Asian_35-44` = "B01001D_027", 
    `Native Hawaiian or Other Pacific Islander_35-44` = "B01001E_027",
    `More than one race_35-44` = "B01001G_027"
  ),
  year = 2021) %>%
  mutate(Year = 2021)

pop_2022_45 <- get_acs(
  geography = "region",
  variables = c(
    `White_35-44` = "B01001A_027",
    `Black or African American_35-44` = "B01001B_027",
    `American Indian or Alaska Native_35-44` = "B01001C_027",
    `Asian_35-44` = "B01001D_027", 
    `Native Hawaiian or Other Pacific Islander_35-44` = "B01001E_027",
    `More than one race_35-44` = "B01001G_027"
  ),
  year = 2022) %>%
  mutate(Year = 2022)

## binding all years for a single age for each age group
pop_15 <- bind_rows(pop_2016_15, pop_2017_15, pop_2018_15, pop_2019_15, pop_2020_15, pop_2021_15, pop_2022_15)

pop_18 <- bind_rows(pop_2016_18, pop_2017_18, pop_2018_18, pop_2019_18, pop_2020_18, pop_2021_18, pop_2022_18)

pop_20 <- bind_rows(pop_2016_20, pop_2017_20, pop_2018_20, pop_2019_20, pop_2020_20, pop_2021_20, pop_2022_20)

pop_25 <- bind_rows(pop_2016_25, pop_2017_25, pop_2018_25, pop_2019_25, pop_2020_25, pop_2021_25, pop_2022_25)

pop_30 <- bind_rows(pop_2016_30, pop_2017_30, pop_2018_30, pop_2019_30, pop_2020_30, pop_2021_30, pop_2022_30)

pop_35 <- bind_rows(pop_2016_35, pop_2017_35, pop_2018_35, pop_2019_35, pop_2020_35, pop_2021_35, pop_2022_35)

pop_45 <- bind_rows(pop_2016_45, pop_2017_45, pop_2018_45, pop_2019_45, pop_2020_45, pop_2021_45, pop_2022_45)

## binding all of the age groups
populationdata_all <- bind_rows(pop_15, pop_18, pop_20, pop_25, pop_30, pop_35, pop_45)

## creating new columns to split race and age group
populationdata_all_new <- populationdata_all %>%
  separate_wider_delim(variable, delim = "_", names = c("race", "age"))

## creating age group 15-19
populationdata_all_new <- populationdata_all_new %>%
  mutate(age = case_when(age %in% c("15-17", "18-19") ~ "15-19", 
                         TRUE ~ age))

## removing 30-35
populationdata_all_new <- populationdata_all_new %>%
  filter(age != "30-35")

## changing "NAME" to "census_region" to match natality dataset
populationdata_all_new$NAME <- gsub("Region", "", populationdata_all_new$NAME)
colnames(populationdata_all_new)[colnames(populationdata_all_new) == "NAME"] <- "census_region"

# remove whitespace from census_region
populationdata_all_new$census_region <- trimws(populationdata_all_new$census_region)

populationdata_all_new <- populationdata_all_new[, -c(1)]
populationdata_all_new <- populationdata_all_new %>%
  rename(year = Year)

populationdata_all_new <- populationdata_all_new %>%
  group_by(census_region, year, race, age) %>%
  summarise(estimate = sum(estimate), .groups = "drop")
knitr::opts_chunk$set(class.source = "foldable")

# full join data
birthratedat <- full_join(populationdata_all_new, natality)

# calculate birthrate
birthratedat <- birthratedat %>%
  mutate(birth_rate = (as.numeric(births) / as.numeric(estimate)) * 1000)

# no nas
birthratedat <- na.omit(birthratedat)

Natality trends in the U.S. show that birth rates have decreased from 2016 to 2022 and that women are delaying motherhood, seen by increases in rates for 30-34 and 35-44 age groups. This time period is also interesting to look at because it shows the effects of COVID-19. The analysis on birth rates by region and race provided interesting results that sparked interest in teen birth rates. These rates decreased significantly over the time period somewhat consistently and also more than other age groups. This could be an indicator of improvements to sexual education, access to contraception, and poverty levels.

The objective of this analysis is to identify teen birth patterns and differences among factors like race and region. This can point to concerns for access to sexual education and signify the need for regional policies. Thirty states require sexual education by law, but seventeen of these only discuss abstinence 1. Only three states, California, Washington, and Oregon, require comprehensive sexual education. Limited access to contraception and insurance coverage further increase gaps 2. Increasing access to these, among other things, could contribute to lower rates. For the time period of 2016 to 2022 specifically, lack of socialization during the pandemic could also be a factor since teens were isolated for a period.

In order to identify patterns across time and what different factors affect this, the following line graph first investigates by race. The figure depicts the trend overtime of teen birth rates for each race. The data comes from the CDC Wonder Births Database and the U.S. Census Bureau, 2017 5-Year ACS data, which was used in the same manner in the natality report, querying by Census region, year, age and race.

knitr::opts_chunk$set(class.source = "foldable")

## teen birthrates alone 
teenrates <- birthratedat %>%
  filter(age == "15-19")

teenrates_race <- teenrates %>%
  group_by(year, race) %>%
  summarise(birth_rate = sum(birth_rate))

ggplot(teenrates_race, aes(x = year, y = birth_rate, group = race, color = race)) + 
  geom_line(size = 0.8) + 
  geom_point(size = 1.5, alpha = 0.8) + 
  labs(title = "Teen Birth Rates Over Time (2016-2022) by Race", 
       x = "Year", 
       y = "Summed Birth Rate",
       color = "Race") + 
  theme_bw() +
  scale_color_brewer(palette = "Dark2")

The x-axis is years to see this trend, and the y-axis is the summed birth rate. A point on the graph is the sum of birth rates across all four Census regions for a given race in a particular year, and each race is depicted by a color. Overall, teen birth rates have decreased over the period, but there is not a significant drop in 2020 as might have been expected. This is only present for the “More than one race” group with a large dip from 2019 to 2020; this could also possibly have something to do with the 2020 Census and if there were differences in racial groups during that survey.

The race that differs from all of the others is the Native Hawaiian or Other Pacific Islander(NHPI) group; it is the only one that has increased. Note, the population and birth numbers for this race are much lower, so the birth rate seems higher. This calculation is births per 1,000 people, so it is a proportion relative to the population size, which is much smaller here. This means the birth rate is high even though the birth counts are relatively small. This highlights the importance of using rates instead of counts for comparison.

Other factors can also be considered here. The NHPI population is quite small in regions other than the West. In regions where this race is prominent, they are being pushed out. Having more children is a way to sustain their culture and ensure the strength of their identity during ongoing marginalization. The NHPI community also has a larger proportion of younger individuals as a result of this 3, meaning there are more people of childbearing age, possibly correlating with this higher birth rate. Differences in access to healthcare, contraception, and education could also be contributing to this.

To investigate this more, the stacked bar chart below dives into education levels among the NHPI population. This is the same data, adding education level as an additional query. The x-axis is years to see changes throughout the time period, and the y-axis is birth counts since rates are not needed for comparison. The education levels have been simplified to focus on four; all other education levels have zero counts, with the exception of “Associate’s Degree (AA), (AS)” and “Unknown”, which both had a very small number of suppressed values and have been excluded for analysis purposes.

knitr::opts_chunk$set(class.source = "foldable")

## native hawaiian teen births with education
file_path_nhe <- "/Users/aisli/OneDrive/Documents/classes/PH460/data/natality_native_ed.txt"
native_ed <- read.table(file_path_nhe, header = TRUE, sep = "\t", stringsAsFactors = FALSE, nrows = 352)

native_ed <- native_ed[, -c(1,2,3,5,7,9,11)] 
colnames(native_ed) <- c("race", "age", "year", "education", "births")

native_ed$year <- as.numeric(native_ed$year)
native_ed$births <- as.numeric(native_ed$births)

native_ed <- native_ed %>%
  filter(year != 2023)
## <8th grace, 11/28 surpressed
## hs with no diploma, no surpressed
## hs grad, 1/28 surpressed 
## some college, 14/28 surpressed
## associates, 10 surpressed, else 0
## bachelor's degree, all 0
## master's, all 0
## doctorate, all 0
## unknown is either 0 or surpress, but has all 7 values from west
## excluded 0 for all 28
## not reported, all 0

native_ed_figure <- native_ed %>%
  filter(!education %in% c(
    "Associate degree (AA, AS)", 
    "Bachelor's degree (BA, AB, BS)", 
    "Master's degree (MA, MS, MEng, MEd, MSW, MBA)", 
    "Doctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD)", 
    "Unknown or Not Stated", 
    "Excluded", 
    "Not Reported"
  ))

native_ed_figure$births[is.na(native_ed_figure$births)] <- 0

native_ed_figure_com <- native_ed_figure %>%
  group_by(year, education) %>%
  summarise(births = sum(births))

ggplot(native_ed_figure_com, aes(x = as.factor(year), y = births, fill = education)) + 
  geom_col() + 
  geom_text(aes(label = births), position = position_stack(vjust = 0.5), size = 3, color = "white", fontface = "bold") + 
  labs(
    title = "NHPI Teen Births Over Time (2016-2022) by Education Level", 
    x = "Year", 
    y = "Birth Count", 
    fill = "Education Level"
  ) + 
  theme_bw() +
  scale_color_brewer(palette = "Dark2")

Considering totals, they are fairly consistent. There is a drop in birth counts in 2020, possibly due to the pandemic, but this count nearly recovers by 2022. Looking at the overall changes, the numbers of NHPI teen mothers with some or completed high school education have not shifted much. However, there are clear changes in the “8th grade or less” and “Some college credit, but not a degree” categories. The “Some college credit, but not a degree” group has dropped to half its value in 2022 compared to 2016, meaning less NHPI teen mothers have attended college. Also, the “8th grade of less” group has more than doubled overtime, meaning that there is an increase in those that have not attended high school.

Investigating education here can indicate access to sexual education in schools. Although sexual education is often taught in middle school, a large portion of students do not receive this information until high school, and only 45% of students nationally receive this education before becoming sexually active 4. So if a larger number of NHPI teens are not receiving education past 8th grade, it could indicate this increase, in addition to the cultural motivations discussed. This also raises the discussion of teen pregnancy limiting the ability to continue education afterward; having a child at this age limits access to choice, further education, and income security.

The following line graph also shows teen birth rates from 2016 to 2022, this time focusing on Census region. A point on the graph is the sum of birth rates for all six races for a given Census region in a particular year, and regions are depicted by color.

knitr::opts_chunk$set(class.source = "foldable")

## teen birthrates by region 
teenrates_region <- teenrates %>%
  group_by(year, census_region) %>%
  summarise(birth_rate = sum(birth_rate))

ggplot(teenrates_region, aes(x = year, y = birth_rate, group = census_region, color = census_region)) + 
  geom_line(size = 0.8) + 
  geom_point(size = 1.5, alpha = 0.8) +
  labs(title = "Teen Birth Rates Over Time (2016-2022) by Region", 
       x = "Year", 
       y = "Summed Birth Rate",
       color = "Census Region") + 
  theme_bw() +
  scale_color_brewer(palette = "Dark2")

This figure shows the overall decreased teen birth rate from 2016 to 2022. Looking at the effects of the pandemic, there is a significant drop from 2019 to 2020 in both the South and West, but oppositely, there is an increase in the Midwest and Northeast. The Northeast was also the only region that did not see an overall decrease across the time period; the value for the Northeast in 2022 was actually higher than the value for the West in 2016, which only decreased. Part of this can be explained by changes in population or be explained by socioeconomic disparities in mostly metropolitan counties in the Northeast, which includes education 5.

Investigating education in the Northeast in a similar way to the NHPI population, the following stacked bar chart is divided by education level in the Northeast. The education levels have similarly been simplified to focus on five since all other education levels have zero counts or suppressed values that are not relevant. Note, the “Associate’s Degree (AA), (AS)” category has more values in the Northeast and is now included.

knitr::opts_chunk$set(class.source = "foldable")

## northeast teen birthrates with education
file_path_nre <- "/Users/aisli/OneDrive/Documents/classes/PH460/data/natality_north_ed_new.txt"
north_ed <- read.table(file_path_nre, header = TRUE, sep = "\t", stringsAsFactors = FALSE, nrows = 792)

north_ed <- north_ed[, -c(1,3,5,7,9,11)] 
colnames(north_ed) <- c("census_region", "race", "age", "year", "education", "births")

north_ed$year <- as.numeric(north_ed$year)
north_ed$births <- as.numeric(north_ed$births)

north_ed <- north_ed %>%
  filter(year != 2023)

north_ed_figure <- north_ed %>%
  filter(!education %in% c( 
    "Bachelor's degree (BA, AB, BS)", 
    "Master's degree (MA, MS, MEng, MEd, MSW, MBA)", 
    "Doctorate (PhD, EdD) or Professional Degree (MD, DDS, DVM, LLB, JD)",
    "Unknown or Not Stated",
    "Excluded", 
    "Not Reported"
  ))

north_ed_figure$births[is.na(north_ed_figure$births)] <- 0

north_ed_figure_com <- north_ed_figure %>%
  group_by(year, education) %>%
  summarise(births = sum(births)) %>%
  mutate(percentage = births / sum(births) * 100)

ggplot(north_ed_figure_com, aes(x = as.factor(year), y = births, fill = education)) + 
  geom_col() + 
  geom_text(aes(label = paste0(round(percentage, 1), "%")), position = position_stack(vjust = 0.5), size = 3, color = "white", fontface = "bold") + 
  labs(
    title = "Northeast Teen Births Over Time (2016-2022) by Education Level", 
    x = "Year", 
    y = "Births", 
    fill = "Education Level"
  ) + 
  theme_bw() + 
  scale_color_brewer(palette = "Dark2")

The total birth counts have decreased overtime, but the birth rate has increased. Based on the birth rate calculation, this means that the population in the region has also changed overtime to change this denominator and affect the birth rate in this way. Here, the labels for each education level are expressed as percentages of the bar for the given year instead of counts; since the count is decreasing this aids in comparison across time. The “8th grade or less” category has increased overtime, possibly indicating that less students are receiving sexual education since a smaller portion are attending high school. Also, the portion of teen mothers in the Northeast with some high school has decreased, while the portion of those with high school completed has increased, offsetting this. Looking at college education, both the “Some college credit, but not a degree” and “Associate’s Degree (AA), (AS)” (with percentages between the gold and blue) categories have decreased, raising the same discussion about continued education after teen pregnancy from the NHPI figure.

The Northeast region is often perceived as the most educated region, reflected in its universities and income levels. This belief is also true below the college level, and there are more states in the Northeast with sexual education requirements, possibly accounting for these decreased birth counts. This region is also experiencing the most rapid aging population in the country 6, meaning there are less childbearing age people to have children or, similarly, that have teenage aged children.

Overall, analyzing teen birth rates shows that they have decreased overall from 2016 to 2022. The outliers with increased rates, the NHPI population and Northeast Census region, are possibly linked to population counts and education levels. Sexual education in schools is the largest factor to lower these rates and give teens access to more choice in the future. This analysis builds on the ideas in the natality report that birth trends raise concerns for reproductive care and women’s health, and highlights the need for increased sexual education and contraception and more regional policies to aid in this.


  1. Detailed Insights on U.S. Sex Education Policies, SIECUS, Sex Ed For Social Change, 2022, https://siecus.org/siecus-state-profiles/↩︎

  2. State Options Addressing Access to Contraception, National Conference of State Legislatures, 2023, https://documents.ncsl.org/wwwncsl/Health/Access-to-Contraception-v02.pdf↩︎

  3. Demographic, Social, Economic, and Housing Characteristics for Selected Race Groups in Hawaii, Hawaii Department of Business, Economic Development and Tourism, 2018, https://files.hawaii.gov/dbedt/economic/reports/SelectedRacesCharacteristics_HawaiiReport.pdf↩︎

  4. What’s the State of Sex Education in the U.S.?, Planned Parenthood, https://www.plannedparenthood.org/learn/for-educators/whats-state-sex-education-us↩︎

  5. Spatially varying predictors of teenage birth rates among counties in the United States, National Institute of Health, 2012, https://pmc.ncbi.nlm.nih.gov/articles/PMC3493119/↩︎

  6. 2019 U.S. Population Estimates Continue to Show the Nation’s Growth Is Slowing, U.S. Census Bureau, 2019, https://www.census.gov/newsroom/press-releases/2019/popest-nation.html↩︎