library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.2
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
MHO_region_0306 <- read_delim("C:/Users/grifw/Downloads/Natality, 2003-2006 (1).txt",delim = "\t", escape_double = FALSE,trim_ws = TRUE)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 428 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (9): Notes, Census Region, Census Region Code, Mother's Hispanic Origin,...
## dbl (3): Year, Year Code, Births
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
MHO_region_0720 <- read_delim("C:/Users/grifw/Downloads/Natality, 2007-2021.txt", delim = "\t", escape_double = FALSE, trim_ws = TRUE)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 158 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (9): Notes, Census Region, Census Region Code, Mother's Hispanic Origin,...
## dbl (3): Year, Year Code, Births
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
MHO_region_0320 = rbind(MHO_region_0306,MHO_region_0720)
MHO_region_0320 = MHO_region_0320 %>%
rename(Region = `Census Region Code`,
MHOrigin = `Mother's Hispanic Origin`,
Age = `Age of Mother 9 Code`,
Fpop = `Female Population`,
Rate = `Fertility Rate` ) %>%
select(MHOrigin, Year, Region, Age, Fpop, Births, Rate)
glimpse(MHO_region_0320)
## Rows: 586
## Columns: 7
## $ MHOrigin <chr> "Hispanic or Latino", "Hispanic or Latino", "Hispanic or Lati…
## $ Year <dbl> 2003, 2004, 2005, 2006, 2003, 2004, 2005, 2006, 2003, 2004, 2…
## $ Region <chr> "CENS-R1", "CENS-R1", "CENS-R1", "CENS-R1", "CENS-R1", "CENS-…
## $ Age <chr> "15", "15", "15", "15", "15-19", "15-19", "15-19", "15-19", "…
## $ Fpop <chr> "Not Available", "Not Available", "Not Available", "Not Avail…
## $ Births <dbl> 257, 257, 263, 251, 14691, 15072, 15249, 16136, 32675, 33102,…
## $ Rate <chr> "Not Available", "Not Available", "Not Available", "Not Avail…
MHO_region_0320 = MHO_region_0320 %>%
mutate(Region = ifelse(Region == "CENS-R1","NE",Region),
Region = ifelse(Region == "CENS-R2","MW",Region),
Region = ifelse(Region == "CENS-R3","SO",Region),
Region = ifelse(Region == "CENS-R4","WE",Region),
MHOrigin = ifelse(MHOrigin == "Hispanic or Latino","HL",MHOrigin),
MHOrigin = ifelse(MHOrigin == "Not Hispanic or Latino","NHL",MHOrigin),
Fpop = as.numeric(Fpop),
Rate = as.numeric(Rate)/1000) %>%
filter(MHOrigin != "Unknown or Not Stated") %>%
drop_na()
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
head(MHO_region_0320)
## # A tibble: 6 × 7
## MHOrigin Year Region Age Fpop Births Rate
## <chr> <dbl> <chr> <chr> <dbl> <dbl> <dbl>
## 1 HL 2003 NE 15-19 233887 14691 0.0628
## 2 HL 2004 NE 15-19 242902 15072 0.0620
## 3 HL 2005 NE 15-19 255348 15249 0.0597
## 4 HL 2006 NE 15-19 266240 16136 0.0606
## 5 HL 2003 NE 20-24 248961 32675 0.131
## 6 HL 2004 NE 20-24 251248 33102 0.132
MHO_region_0320 %>%
filter(Age == "25-29") %>%
ggplot(aes(x= Year, y = Rate)) +
geom_point() +
facet_grid(MHOrigin~Region) +
ggtitle("TS Plot of Rate for 25-29 by Mother's Hispanic Origin and Region")

MHO_region_0320 %>%
filter(Age == "25-29") %>%
ggplot(aes(x= Year, y = Rate)) +
geom_point() +
facet_grid(Region~MHOrigin) +
ggtitle("TS Plot of Rate for 25-29 by Mother's Hispanic Origin and Region")

g1 = MHO_region_0320 %>%
group_by(Year,MHOrigin,Age) %>%
summarize(Births = sum(Births),
Fpop = sum(Fpop)) %>%
mutate(Rate = Births/Fpop)%>%
summarize(TFR = sum(Rate) * 5) %>%
ungroup() %>%
ggplot(aes(x = Year,y = TFR, color = MHOrigin)) +
geom_point()
## `summarise()` has grouped output by 'Year', 'MHOrigin'. You can override using
## the `.groups` argument.
## `summarise()` has grouped output by 'Year'. You can override using the
## `.groups` argument.
ggtitle("National TFR by Year and Mother's Hispanic Origin")
## $title
## [1] "National TFR by Year and Mother's Hispanic Origin"
##
## attr(,"class")
## [1] "labels"
ggplotly(g1)
g2 = MHO_region_0320 %>%
group_by(Year,Region,MHOrigin,Age) %>%
summarize(Births = sum(Births),
Fpop = sum(Fpop)) %>%
mutate(Rate = Births/Fpop)%>%
summarize(TFR = sum(Rate) * 5) %>%
ungroup() %>%
ggplot(aes(x = Year,y = TFR, color = MHOrigin)) +
geom_point() +
facet_grid(MHOrigin~Region)
## `summarise()` has grouped output by 'Year', 'Region', 'MHOrigin'. You can
## override using the `.groups` argument.
## `summarise()` has grouped output by 'Year', 'Region'. You can override using
## the `.groups` argument.
ggtitle("Regional TFR by Year and Mother's Hispanic Origin")
## $title
## [1] "Regional TFR by Year and Mother's Hispanic Origin"
##
## attr(,"class")
## [1] "labels"
ggplotly(g2)