library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
eth_region_0306 <- read_delim("~/R/Natality, 2003-2006 F1.txt",
delim = "\t", escape_double = FALSE,
trim_ws = TRUE)
## Warning: One or more parsing issues, see `problems()` for details
## Rows: 427 Columns: 12
## -- Column specification --------------------------------------------------------
## Delimiter: "\t"
## chr (9): Notes, Mother's Hispanic Origin, Mother's Hispanic Origin Code, Age...
## dbl (3): Year, Year Code, Births
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
eth_region_0720 <- read_delim("~/R/Natality, 2007-2020 F1.txt",
delim = "\t", escape_double = FALSE,
trim_ws = TRUE)
## Warning: One or more parsing issues, see `problems()` for details
## Rows: 1455 Columns: 12
## -- Column specification --------------------------------------------------------
## Delimiter: "\t"
## chr (9): Notes, Mother's Hispanic Origin, Mother's Hispanic Origin Code, Age...
## dbl (3): Year, Year Code, Births
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
eth_region_0320 <- rbind(eth_region_0306,eth_region_0720)
glimpse(eth_region_0320)
## Rows: 1,882
## Columns: 12
## $ Notes <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA~
## $ `Mother's Hispanic Origin` <chr> "Hispanic or Latino", "Hispanic or Lat~
## $ `Mother's Hispanic Origin Code` <chr> "2135-2", "2135-2", "2135-2", "2135-2"~
## $ `Age of Mother 9` <chr> "Under 15 years", "Under 15 years", "U~
## $ `Age of Mother 9 Code` <chr> "15", "15", "15", "15", "15", "15", "1~
## $ Year <dbl> 2003, 2003, 2003, 2003, 2004, 2004, 20~
## $ `Year Code` <dbl> 2003, 2003, 2003, 2003, 2004, 2004, 20~
## $ `Census Region` <chr> "Census Region 1: Northeast", "Census ~
## $ `Census Region Code` <chr> "CENS-R1", "CENS-R2", "CENS-R3", "CENS~
## $ Births <dbl> 257, 190, 936, 973, 257, 192, 956, 951~
## $ `Female Population` <chr> "Not Available", "Not Available", "Not~
## $ `Fertility Rate` <chr> "Not Available", "Not Available", "Not~
#rename and select
eth_region_0320 = eth_region_0320 %>%
rename(Region = `Census Region Code`,
Ethnicity = `Mother's Hispanic Origin`,
Age = `Age of Mother 9 Code`,
Fpop = `Female Population`,
Rate = `Fertility Rate` ) %>%
select(Ethnicity, Year, Region, Age, Fpop, Births, Rate)
glimpse(eth_region_0320)
## Rows: 1,882
## Columns: 7
## $ Ethnicity <chr> "Hispanic or Latino", "Hispanic or Latino", "Hispanic or Lat~
## $ Year <dbl> 2003, 2003, 2003, 2003, 2004, 2004, 2004, 2004, 2005, 2005, ~
## $ Region <chr> "CENS-R1", "CENS-R2", "CENS-R3", "CENS-R4", "CENS-R1", "CENS~
## $ Age <chr> "15", "15", "15", "15", "15", "15", "15", "15", "15", "15", ~
## $ Fpop <chr> "Not Available", "Not Available", "Not Available", "Not Avai~
## $ Births <dbl> 257, 190, 936, 973, 257, 192, 956, 951, 263, 216, 989, 998, ~
## $ Rate <chr> "Not Available", "Not Available", "Not Available", "Not Avai~
#Recode
eth_region_0320 = eth_region_0320 %>%
mutate(Region = ifelse(Region == "CENS-R1","NE",Region),
Region = ifelse(Region == "CENS-R2","MW",Region),
Region = ifelse(Region == "CENS-R3","SO",Region),
Region = ifelse(Region == "CENS-R4","WE",Region),
Fpop = as.numeric(Fpop),
Rate = as.numeric(Rate)/1000) %>%
filter(Ethnicity != "Not Reported") %>%
drop_na()
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
head(eth_region_0320)
## # A tibble: 6 x 7
## Ethnicity Year Region Age Fpop Births Rate
## <chr> <dbl> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Hispanic or Latino 2003 NE 15-19 233887 14691 0.0628
## 2 Hispanic or Latino 2003 MW 15-19 148518 12041 0.0811
## 3 Hispanic or Latino 2003 SO 15-19 533647 47220 0.0885
## 4 Hispanic or Latino 2003 WE 15-19 723943 54572 0.0754
## 5 Hispanic or Latino 2004 NE 15-19 242902 15072 0.0620
## 6 Hispanic or Latino 2004 MW 15-19 153522 12406 0.0808
#First Plot
eth_region_0320 %>%
filter(Age == "25-29") %>%
ggplot(aes(x= Year, y = Rate)) +
geom_point() +
facet_grid(Ethnicity~Region) +
ggtitle("TS Plot of Rate for 25-29 by Ethnicity and Region")
#Flip the Grid
eth_region_0320 %>%
filter(Age == "25-29") %>%
ggplot(aes(x= Year, y = Rate)) +
geom_point() +
facet_grid(Region~Ethnicity) +
ggtitle("TS Plot of Rate for 25-29 by Ethnicity and Region")
#National TFR by Ethnicity
g1 = eth_region_0320 %>%
group_by(Year,Ethnicity,Age) %>%
summarize(Births = sum(Births),
Fpop = sum(Fpop)) %>%
mutate(Rate = Births/Fpop)%>%
summarize(TFR = sum(Rate) * 5) %>%
ungroup() %>%
ggplot(aes(x = Year,y = TFR, color = Ethnicity)) +
geom_point()
## `summarise()` has grouped output by 'Year', 'Ethnicity'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'Year'. You can override using the `.groups` argument.
ggtitle("National TFR by Year and Ethnicity")
## $title
## [1] "National TFR by Year and Ethnicity"
##
## attr(,"class")
## [1] "labels"
ggplotly(g1)
#Ethnicity & Region
g2 = eth_region_0320 %>%
group_by(Year,Region,Ethnicity,Age) %>%
summarize(Births = sum(Births),
Fpop = sum(Fpop)) %>%
mutate(Rate = Births/Fpop)%>%
summarize(TFR = sum(Rate) * 5) %>%
ungroup() %>%
ggplot(aes(x = Year,y = TFR, color = Ethnicity)) +
geom_point(size=0.5) +
facet_grid(Ethnicity~Region)
## `summarise()` has grouped output by 'Year', 'Region', 'Ethnicity'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'Year', 'Region'. You can override using the `.groups` argument.
ggtitle("Regional TFR by Year and Ethnicity")
## $title
## [1] "Regional TFR by Year and Ethnicity"
##
## attr(,"class")
## [1] "labels"
ggplotly(g2)