Fertility 3

Harold Nelson

3/3/2022

Setup

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.5     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.0.2     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

Get Data

race_region_0306 <- read_delim("~/Downloads/Natality, 2003-2006.txt",delim = "\t", escape_double = FALSE,trim_ws = TRUE)
## Rows: 558 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (9): Notes, Census Region, Census Region Code, Mother's Bridged Race, Mo...
## dbl (3): Year, Year Code, Births
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
race_region_0720 <- read_delim("~/Downloads/Natality, 2007-2020.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
## Rows: 1824 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (9): Notes, Census Region, Census Region Code, Mother's Bridged Race, Mo...
## dbl (3): Year, Year Code, Births
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
race_region_0320 = rbind(race_region_0306,race_region_0720)
## Warning: One or more parsing issues, see `problems()` for details

## Warning: One or more parsing issues, see `problems()` for details

Look

glimpse(race_region_0320)
## Rows: 2,382
## Columns: 12
## $ Notes                        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ `Census Region`              <chr> "Census Region 1: Northeast", "Census Reg…
## $ `Census Region Code`         <chr> "CENS-R1", "CENS-R1", "CENS-R1", "CENS-R1…
## $ `Mother's Bridged Race`      <chr> "American Indian or Alaska Native", "Amer…
## $ `Mother's Bridged Race Code` <chr> "1002-5", "1002-5", "1002-5", "1002-5", "…
## $ `Age of Mother 9`            <chr> "15-19 years", "15-19 years", "15-19 year…
## $ `Age of Mother 9 Code`       <chr> "15-19", "15-19", "15-19", "15-19", "20-2…
## $ Year                         <dbl> 2003, 2004, 2005, 2006, 2003, 2004, 2005,…
## $ `Year Code`                  <dbl> 2003, 2004, 2005, 2006, 2003, 2004, 2005,…
## $ Births                       <dbl> 238, 244, 243, 284, 525, 518, 526, 543, 5…
## $ `Female Population`          <chr> "12077", "12860", "13850", "14676", "1121…
## $ `Fertility Rate`             <chr> "19.71", "18.97", "17.55", "19.35", "46.8…

Rename and Select

race_region_0320 = race_region_0320 %>% 
  rename(Region = `Census Region Code`,
         Race = `Mother's Bridged Race`,
         Age = `Age of Mother 9 Code`,
         Fpop = `Female Population`,
         Rate = `Fertility Rate` ) %>% 
  select(Race, Year, Region, Age, Fpop, Births, Rate)

glimpse(race_region_0320)
## Rows: 2,382
## Columns: 7
## $ Race   <chr> "American Indian or Alaska Native", "American Indian or Alaska …
## $ Year   <dbl> 2003, 2004, 2005, 2006, 2003, 2004, 2005, 2006, 2003, 2004, 200…
## $ Region <chr> "CENS-R1", "CENS-R1", "CENS-R1", "CENS-R1", "CENS-R1", "CENS-R1…
## $ Age    <chr> "15-19", "15-19", "15-19", "15-19", "20-24", "20-24", "20-24", …
## $ Fpop   <chr> "12077", "12860", "13850", "14676", "11217", "12034", "12697", …
## $ Births <dbl> 238, 244, 243, 284, 525, 518, 526, 543, 505, 536, 546, 536, 394…
## $ Rate   <chr> "19.71", "18.97", "17.55", "19.35", "46.80", "43.04", "41.43", …

Recode

race_region_0320 = race_region_0320 %>% 
  mutate(Region = ifelse(Region == "CENS-R1","NE",Region),
         Region = ifelse(Region == "CENS-R2","MW",Region),
         Region = ifelse(Region == "CENS-R3","SO",Region),
         Region = ifelse(Region == "CENS-R4","WE",Region),
         Race = ifelse(Race == "American Indian or Alaska Native","AmInd",Race),
         Race = ifelse(Race == "Asian or Pacific Islander","API",Race),
         Race = ifelse(Race == "Black or African American","Black",Race),
         Fpop = as.numeric(Fpop),
         Rate = as.numeric(Rate)/1000) %>% 
filter(Race != "Not Reported") %>% 
drop_na()
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
head(race_region_0320)
## # A tibble: 6 × 7
##   Race   Year Region Age    Fpop Births   Rate
##   <chr> <dbl> <chr>  <chr> <dbl>  <dbl>  <dbl>
## 1 AmInd  2003 NE     15-19 12077    238 0.0197
## 2 AmInd  2004 NE     15-19 12860    244 0.0190
## 3 AmInd  2005 NE     15-19 13850    243 0.0176
## 4 AmInd  2006 NE     15-19 14676    284 0.0194
## 5 AmInd  2003 NE     20-24 11217    525 0.0468
## 6 AmInd  2004 NE     20-24 12034    518 0.0430

First Plot

Plot the yearly Rate for age group 25-29 in a grid by Race and Region.

Solution

race_region_0320 %>% 
  filter(Age == "25-29") %>% 
  ggplot(aes(x= Year, y = Rate)) +
  geom_point() +
  facet_grid(Race~Region) +
  ggtitle("TS Plot of Rate for 25-29 by Race and Region")

Flip the Grid

Solution

race_region_0320 %>% 
  filter(Age == "25-29") %>% 
  ggplot(aes(x= Year, y = Rate)) +
  geom_point() +
  facet_grid(Region~Race) +
  ggtitle("TS Plot of Rate for 25-29 by Race and Region")

National TFR by Race

Create a plot showing the TFR for the whole country by Race.

Solution

g1 = race_region_0320 %>% 
  group_by(Year,Race,Age) %>% 
  summarize(Births = sum(Births),
            Fpop = sum(Fpop)) %>% 
  mutate(Rate = Births/Fpop)%>% 
  summarize(TFR = sum(Rate) * 5) %>% 
  ungroup() %>% 
  ggplot(aes(x = Year,y = TFR, color = Race)) +
  geom_point()
## `summarise()` has grouped output by 'Year', 'Race'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'Year'. You can override using the `.groups` argument.
  ggtitle("National TFR by Year and Race")
## $title
## [1] "National TFR by Year and Race"
## 
## attr(,"class")
## [1] "labels"
ggplotly(g1)

Race and Region

Create a plot showing the TFR by Race and Region. Use plotly.

Solution

g2 = race_region_0320 %>% 
  group_by(Year,Region,Race,Age) %>% 
  summarize(Births = sum(Births),
            Fpop = sum(Fpop)) %>% 
  mutate(Rate = Births/Fpop)%>% 
  summarize(TFR = sum(Rate) * 5) %>% 
  ungroup() %>% 
  ggplot(aes(x = Year,y = TFR, color = Race)) +
  geom_point() +
  facet_grid(Race~Region)
## `summarise()` has grouped output by 'Year', 'Region', 'Race'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'Year', 'Region'. You can override using the `.groups` argument.
  ggtitle("Regional TFR by Year and Race")
## $title
## [1] "Regional TFR by Year and Race"
## 
## attr(,"class")
## [1] "labels"
ggplotly(g2)

Numerators and Denominators

Can we trust this data? The numerators come from birth certificates. The denominators come from the census. Is it possible that some women report themselves as native americans on the census and report themselves as some other race when they give birth?

This is a constant problem with statistical analysis of data involving race.