Setup

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   1.0.1 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.3.0      ✔ stringr 1.5.0 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout

Get Data

race_region_0306 <- read_delim("~/Downloads/Natality, 2003-2006 (1).txt",delim = "\t", escape_double = FALSE,trim_ws = TRUE)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 428 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (9): Notes, Census Region, Census Region Code, Mother's Hispanic Origin,...
## dbl (3): Year, Year Code, Births
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
race_region_0721 <- read_delim("~/Downloads/Natality, 2007-2021 (1).txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
## Rows: 158 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (9): Notes, Census Region, Census Region Code, Mother's Hispanic Origin,...
## dbl (3): Year, Year Code, Births
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
race_region_0321 = rbind(race_region_0306,race_region_0721)

Look

glimpse(race_region_0321)
## Rows: 586
## Columns: 12
## $ Notes                           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ `Census Region`                 <chr> "Census Region 1: Northeast", "Census …
## $ `Census Region Code`            <chr> "CENS-R1", "CENS-R1", "CENS-R1", "CENS…
## $ `Mother's Hispanic Origin`      <chr> "Hispanic or Latino", "Hispanic or Lat…
## $ `Mother's Hispanic Origin Code` <chr> "2135-2", "2135-2", "2135-2", "2135-2"…
## $ `Age of Mother 9`               <chr> "Under 15 years", "Under 15 years", "U…
## $ `Age of Mother 9 Code`          <chr> "15", "15", "15", "15", "15-19", "15-1…
## $ Year                            <dbl> 2003, 2004, 2005, 2006, 2003, 2004, 20…
## $ `Year Code`                     <dbl> 2003, 2004, 2005, 2006, 2003, 2004, 20…
## $ Births                          <dbl> 257, 257, 263, 251, 14691, 15072, 1524…
## $ `Female Population`             <chr> "Not Available", "Not Available", "Not…
## $ `Fertility Rate`                <chr> "Not Available", "Not Available", "Not…

Rename and Select

race_region_0321 = race_region_0321 %>%
  rename(Region = "Census Region Code",
         Origin = "Mother's Hispanic Origin",
         Age = "Age of Mother 9 Code",
         Fpop = "Female Population",
         Rate = "Fertility Rate") %>%
  select(Origin, Year, Region, Age, Fpop, Births, Rate)
glimpse(race_region_0321)
## Rows: 586
## Columns: 7
## $ Origin <chr> "Hispanic or Latino", "Hispanic or Latino", "Hispanic or Latino…
## $ Year   <dbl> 2003, 2004, 2005, 2006, 2003, 2004, 2005, 2006, 2003, 2004, 200…
## $ Region <chr> "CENS-R1", "CENS-R1", "CENS-R1", "CENS-R1", "CENS-R1", "CENS-R1…
## $ Age    <chr> "15", "15", "15", "15", "15-19", "15-19", "15-19", "15-19", "20…
## $ Fpop   <chr> "Not Available", "Not Available", "Not Available", "Not Availab…
## $ Births <dbl> 257, 257, 263, 251, 14691, 15072, 15249, 16136, 32675, 33102, 3…
## $ Rate   <chr> "Not Available", "Not Available", "Not Available", "Not Availab…

Recode

race_region_0321 = race_region_0321 %>%
  mutate(Region = ifelse(Region == "CENS-R1", "NE",Region),
         Region = ifelse(Region == "CENS-R2","MW",Region),
         Region = ifelse(Region == "CENS-R3","SO",Region),
         Region = ifelse(Region == "CENS-R4","WE",Region), Origin = ifelse(Origin == "Hispanic or Latino","HisLat",Origin),
         Fpop = as.numeric(Fpop),
         Rate = as.numeric(Rate)/1000) %>% 
filter(Origin != "Not Reported") %>% 
drop_na()
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
head(race_region_0321)
## # A tibble: 6 × 7
##   Origin  Year Region Age     Fpop Births   Rate
##   <chr>  <dbl> <chr>  <chr>  <dbl>  <dbl>  <dbl>
## 1 HisLat  2003 NE     15-19 233887  14691 0.0628
## 2 HisLat  2004 NE     15-19 242902  15072 0.0620
## 3 HisLat  2005 NE     15-19 255348  15249 0.0597
## 4 HisLat  2006 NE     15-19 266240  16136 0.0606
## 5 HisLat  2003 NE     20-24 248961  32675 0.131 
## 6 HisLat  2004 NE     20-24 251248  33102 0.132

First Plot: Plot the yearly rate for age group 25 - 29 in a grid by Origin and Region

race_region_0321 %>%
  filter(Age == "25-29") %>%
  ggplot(aes(x = Year, y = Rate)) + 
  geom_point() +
  facet_grid(Origin~Region)

ggtitle("TS Plot of Rate for 25-29 by Origin and Region")
## $title
## [1] "TS Plot of Rate for 25-29 by Origin and Region"
## 
## attr(,"class")
## [1] "labels"

Flip the Grid

race_region_0321 %>%
  filter(Age == "25-29") %>%
  ggplot(aes(x = Year, y = Rate)) + 
  geom_point() +
  facet_grid(Region~Origin)

ggtitle("TS Plot of Rate for 25-29 by Origin and Region")
## $title
## [1] "TS Plot of Rate for 25-29 by Origin and Region"
## 
## attr(,"class")
## [1] "labels"

National TFR: Create a plot showing the TFR for the whole country by Origin

g1 = race_region_0321 %>% 
  group_by(Year,Origin,Age) %>% 
  summarize(Births = sum(Births),
            Fpop = sum(Fpop)) %>% 
  mutate(Rate = Births/Fpop)%>% 
  summarize(TFR = sum(Rate) * 5) %>% 
  ungroup() %>% 
  ggplot(aes(x = Year,y = TFR, color = Origin)) +
  geom_point()
## `summarise()` has grouped output by 'Year', 'Origin'. You can override using
## the `.groups` argument.
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
ggtitle("National TFR by Year and Origin")
## $title
## [1] "National TFR by Year and Origin"
## 
## attr(,"class")
## [1] "labels"
ggplotly(g1)

Create a TFR by Origin and Region using plotly.

g2 = race_region_0321 %>%
  group_by(Year,Region,Origin,Age) %>% 
  summarize(Births = sum(Births),
            Fpop = sum(Fpop)) %>% 
  mutate(Rate = Births/Fpop)%>% 
  summarize(TFR = sum(Rate) * 5) %>% 
  ungroup() %>% 
  ggplot(aes(x = Year,y = TFR, color = Origin)) +
  geom_point() +
  facet_grid(Origin~Region)
## `summarise()` has grouped output by 'Year', 'Region', 'Origin'. You can
## override using the `.groups` argument.
## `summarise()` has grouped output by 'Year', 'Region'. You can override using
## the `.groups` argument.
ggtitle("Regional TFR by Year and Origin")
## $title
## [1] "Regional TFR by Year and Origin"
## 
## attr(,"class")
## [1] "labels"
ggplotly(g2)