There is a significant decline in birth rate in the US. Some say that the decline is result from financial insecurity, increase of people seeking higher education, delayed marriage, cost of living, and many other reasons. It can be concerning since the decline of birth rate could result in a decrease in future workforce as the current workforce ages.
The Data: US Trends on Teen Births Consists of data of all 50 states and National Birth Rate. Yearly birth rate collected from 1990 - 2016 by the Census.
Since overall birth rate has decrease, this data will show whether there is a similar pattern with teen birth rate.
Data
library(readr)
us <- read_csv("C:/Users/Kim/Desktop/Stuff/Random Data Sets/nchs-u.s.-and-state-trends-on-teen-births.csv")
## Parsed with column specification:
## cols(
## Year = col_double(),
## State = col_character(),
## `Age Group (Years)` = col_character(),
## `State Rate` = col_double(),
## `State Births` = col_double(),
## `U.S. Births` = col_double(),
## `U.S. Birth Rate` = col_double(),
## Unit = col_character()
## )
us <- data.frame(us)
colnames(us)[3] <- "Age.Group"
head(us)
How long does the data span in years? What age groups are included?
table(us$Year); table(us$Age.Group)
##
## 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004
## 156 156 156 156 156 156 156 156 156 156 156 156 156 156 156
## 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016
## 156 156 156 156 156 156 156 156 156 156 156 156
##
## 15-17 years 15-19 years 18-19 years
## 1404 1404 1404
library(DataExplorer)
plot_histogram(us)
# Is there a correlation between states rates and national rates?
plot_correlation(us)
## 1 features with more than 20 categories ignored!
## State: 52 categories
## Warning in cor(x = structure(list(Year = c(1990, 1990, 1990, 1990, 1990, :
## the standard deviation is zero
## Warning: Removed 16 rows containing missing values (geom_text).
Top States with Highest Birth Rates
# Take the Average Birth Rate from 1990 to 2016
library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------ tidyverse 1.2.1 --
## v ggplot2 3.2.0 v purrr 0.3.2
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 0.8.3 v stringr 1.4.0
## v ggplot2 3.2.0 v forcats 0.4.0
## -- Conflicts --------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
# Top states graph
us %>%
group_by(State) %>%
summarise(
ave.Rate = mean(State.Rate)
) %>%
arrange(desc(ave.Rate)) %>% # Mississippi, New Mexico, Texas, Arkansas, Arizona
top_n(5) %>%
ggplot(mapping = aes(x = State, y = ave.Rate, fill = State)) +
geom_bar(stat = "identity") +
geom_text(aes(label = round(ave.Rate, digit = 1)),
position = position_dodge(width = 0.5), vjust = -0.2) +
labs(x = "State", y = "Birth Rate (per 1,000)", title =
"5 States with Highest Average Teen Birth Rates\nFrom 1990 - 2016") +
theme(legend.position = "",
axis.title = element_text(face = 2)) +
scale_y_continuous(limits = c(0, 75))
## Selecting by ave.Rate
# Lowest states
us %>%
group_by(State) %>%
summarise(
ave.Rate = mean(State.Rate)
) %>%
arrange(desc(ave.Rate)) %>% # New Hampshire, Vermont,, Massachusetts, Connecticut, Minnesota
top_n(-5) %>%
ggplot(mapping = aes(x = State, y = ave.Rate, fill = State)) +
geom_bar(stat = "identity") +
geom_text(aes(label = round(ave.Rate, digit = 1)),
position = position_dodge(width = 0.5), vjust = -0.2) +
labs(x = "State", y = "Birth Rate (per 1,000)", title =
"5 States with Lowest Average Teen Birth Rates\nFrom 1990 - 2016") +
theme(legend.position = "",
axis.title = element_text(face = 2)) +
scale_y_continuous(limits = c(0, 40))
## Selecting by ave.Rate
Top 3 States Performance over time
# Top 3 States: # Mississippi, New Mexico, Texas
us %>%
filter((State == "New Mexico") |
(State == "Mississippi") | (State == "Texas")) %>%
group_by(State, Year) %>%
summarise(
ave.Rate = mean(State.Rate)
) %>%
# Arizona, Arkansas, Mississippi, New Mexico, Texas
ggplot(mapping = aes(x = Year, y = ave.Rate, group = State, color = State)) +
geom_line(size = 1.2, alpha = 0.6) +
labs(x = "Year", y = "Birth Rate (per 1,000)", title =
"Top 3 States with the Highest Teen Birth Rate\nFrom 1990 - 2016") +
scale_x_continuous(breaks = seq(1990, 2016, 5))
us %>%
group_by(Age.Group, Year) %>%
summarise(
ave.Rate = mean(U.S..Birth.Rate)
) %>%
ggplot(mapping = aes(x = Year, y = ave.Rate, group = Age.Group, color = Age.Group)) +
geom_line(size = 1.2, alpha = 0.8) +
labs(x = "Year", y = "Birth Rate (per 1,000)", title =
"US Teen Birth Rate\nFrom 1990 - 2016 by Age Group") +
scale_x_continuous(breaks = seq(1990, 2016, 5))
Top, Low, and National Teen Birth Rate
# Highest: Mississippi Lowest: New Hampshire
us %>%
filter((State == "Total U.S.") | (State == "Mississippi") | (State == "New Hampshire")) %>%
group_by(State, Year) %>%
summarise(
ave.Rate = mean(State.Rate)
) %>%
# Arizona, Arkansas, Mississippi, New Mexico, Texas
ggplot(mapping = aes(x = Year, y = ave.Rate, group = State, color = State)) +
geom_line(size = 1.2, alpha = 0.6) +
labs(x = "Year", y = "Birth Rate (per 1,000)", title =
"Highest & Lowest States Teen Birth Rate\nCompared to National Rate From 1990 - 2016") +
theme(plot.title = element_text(hjust = 0.5)) +
scale_x_continuous(breaks = seq(1990, 2016, 5))