Harold Nelson
3/3/2022
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.5 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.0.2 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
race_region_0306 <- read_delim("~/Downloads/Natality, 2003-2006.txt",delim = "\t", escape_double = FALSE,trim_ws = TRUE)
## Rows: 558 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (9): Notes, Census Region, Census Region Code, Mother's Bridged Race, Mo...
## dbl (3): Year, Year Code, Births
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
race_region_0720 <- read_delim("~/Downloads/Natality, 2007-2020.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
## Rows: 1824 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (9): Notes, Census Region, Census Region Code, Mother's Bridged Race, Mo...
## dbl (3): Year, Year Code, Births
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: One or more parsing issues, see `problems()` for details
## Warning: One or more parsing issues, see `problems()` for details
## Rows: 2,382
## Columns: 12
## $ Notes <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ `Census Region` <chr> "Census Region 1: Northeast", "Census Reg…
## $ `Census Region Code` <chr> "CENS-R1", "CENS-R1", "CENS-R1", "CENS-R1…
## $ `Mother's Bridged Race` <chr> "American Indian or Alaska Native", "Amer…
## $ `Mother's Bridged Race Code` <chr> "1002-5", "1002-5", "1002-5", "1002-5", "…
## $ `Age of Mother 9` <chr> "15-19 years", "15-19 years", "15-19 year…
## $ `Age of Mother 9 Code` <chr> "15-19", "15-19", "15-19", "15-19", "20-2…
## $ Year <dbl> 2003, 2004, 2005, 2006, 2003, 2004, 2005,…
## $ `Year Code` <dbl> 2003, 2004, 2005, 2006, 2003, 2004, 2005,…
## $ Births <dbl> 238, 244, 243, 284, 525, 518, 526, 543, 5…
## $ `Female Population` <chr> "12077", "12860", "13850", "14676", "1121…
## $ `Fertility Rate` <chr> "19.71", "18.97", "17.55", "19.35", "46.8…
race_region_0320 = race_region_0320 %>%
rename(Region = `Census Region Code`,
Race = `Mother's Bridged Race`,
Age = `Age of Mother 9 Code`,
Fpop = `Female Population`,
Rate = `Fertility Rate` ) %>%
select(Race, Year, Region, Age, Fpop, Births, Rate)
glimpse(race_region_0320)
## Rows: 2,382
## Columns: 7
## $ Race <chr> "American Indian or Alaska Native", "American Indian or Alaska …
## $ Year <dbl> 2003, 2004, 2005, 2006, 2003, 2004, 2005, 2006, 2003, 2004, 200…
## $ Region <chr> "CENS-R1", "CENS-R1", "CENS-R1", "CENS-R1", "CENS-R1", "CENS-R1…
## $ Age <chr> "15-19", "15-19", "15-19", "15-19", "20-24", "20-24", "20-24", …
## $ Fpop <chr> "12077", "12860", "13850", "14676", "11217", "12034", "12697", …
## $ Births <dbl> 238, 244, 243, 284, 525, 518, 526, 543, 505, 536, 546, 536, 394…
## $ Rate <chr> "19.71", "18.97", "17.55", "19.35", "46.80", "43.04", "41.43", …
race_region_0320 = race_region_0320 %>%
mutate(Region = ifelse(Region == "CENS-R1","NE",Region),
Region = ifelse(Region == "CENS-R2","MW",Region),
Region = ifelse(Region == "CENS-R3","SO",Region),
Region = ifelse(Region == "CENS-R4","WE",Region),
Race = ifelse(Race == "American Indian or Alaska Native","AmInd",Race),
Race = ifelse(Race == "Asian or Pacific Islander","API",Race),
Race = ifelse(Race == "Black or African American","Black",Race),
Fpop = as.numeric(Fpop),
Rate = as.numeric(Rate)/1000) %>%
filter(Race != "Not Reported") %>%
drop_na()
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## # A tibble: 6 × 7
## Race Year Region Age Fpop Births Rate
## <chr> <dbl> <chr> <chr> <dbl> <dbl> <dbl>
## 1 AmInd 2003 NE 15-19 12077 238 0.0197
## 2 AmInd 2004 NE 15-19 12860 244 0.0190
## 3 AmInd 2005 NE 15-19 13850 243 0.0176
## 4 AmInd 2006 NE 15-19 14676 284 0.0194
## 5 AmInd 2003 NE 20-24 11217 525 0.0468
## 6 AmInd 2004 NE 20-24 12034 518 0.0430
Plot the yearly Rate for age group 25-29 in a grid by Race and Region.
race_region_0320 %>%
filter(Age == "25-29") %>%
ggplot(aes(x= Year, y = Rate)) +
geom_point() +
facet_grid(Race~Region) +
ggtitle("TS Plot of Rate for 25-29 by Race and Region")
race_region_0320 %>%
filter(Age == "25-29") %>%
ggplot(aes(x= Year, y = Rate)) +
geom_point() +
facet_grid(Region~Race) +
ggtitle("TS Plot of Rate for 25-29 by Race and Region")
Create a plot showing the TFR for the whole country by Race.
g1 = race_region_0320 %>%
group_by(Year,Race,Age) %>%
summarize(Births = sum(Births),
Fpop = sum(Fpop)) %>%
mutate(Rate = Births/Fpop)%>%
summarize(TFR = sum(Rate) * 5) %>%
ungroup() %>%
ggplot(aes(x = Year,y = TFR, color = Race)) +
geom_point()
## `summarise()` has grouped output by 'Year', 'Race'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'Year'. You can override using the `.groups` argument.
## $title
## [1] "National TFR by Year and Race"
##
## attr(,"class")
## [1] "labels"
Create a plot showing the TFR by Race and Region. Use plotly.
g2 = race_region_0320 %>%
group_by(Year,Region,Race,Age) %>%
summarize(Births = sum(Births),
Fpop = sum(Fpop)) %>%
mutate(Rate = Births/Fpop)%>%
summarize(TFR = sum(Rate) * 5) %>%
ungroup() %>%
ggplot(aes(x = Year,y = TFR, color = Race)) +
geom_point() +
facet_grid(Race~Region)
## `summarise()` has grouped output by 'Year', 'Region', 'Race'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'Year', 'Region'. You can override using the `.groups` argument.
## $title
## [1] "Regional TFR by Year and Race"
##
## attr(,"class")
## [1] "labels"
Can we trust this data? The numerators come from birth certificates. The denominators come from the census. Is it possible that some women report themselves as native americans on the census and report themselves as some other race when they give birth?
This is a constant problem with statistical analysis of data involving race.