I created four dynamic scatter plot tables that show the county by county relationships between COVID-19 mortality and :
- public housing assistance rate
- median age
- median income
- higher education attainment
The script below:
The final analysis is here
library(plotly) #make it interactive
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(tidyverse) # create the visualizations
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ✔ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks plotly::filter(), stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggthemes) # preset plot aesthetics
covid_quintile <- read.csv("~/STA 518/BrookemWalters-Portfolio/Stats 518 Final Project/data/covid_census.csv") %>%
select(population19E, householdsE, median_ageE, median_incomeE, bach_degree_plus_a25E, unemployment_rate, public_assist_rate, percent_asian, percent_black, percent_native, percent_pacific_islander, percent_white, percent_hispanic, County, Total_Deaths, Deaths_Per_Pop_Thousand) %>%
mutate(covid_mortality_quintile = ntile(Deaths_Per_Pop_Thousand,5))
1 = lowest Covid deaths per 1,000 5 = highest Covid deaths per 1,000
us_cd_deaths <- read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv")
## Rows: 3342 Columns: 941
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): iso2, iso3, Admin2, Province_State, Country_Region, Combined_Key
## dbl (935): UID, code3, FIPS, Lat, Long_, Population, 1/22/20, 1/23/20, 1/24/...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
source_two_date <- rev(names(us_cd_deaths))[1]
print_sources <- print(paste ("Sources: 1) American Community Survey (ACS), 2016-2020 5-Year estimates, 2) JHU CSSE COVID-19 Data Repository as of", source_two_date))
## [1] "Sources: 1) American Community Survey (ACS), 2016-2020 5-Year estimates, 2) JHU CSSE COVID-19 Data Repository as of 8/7/22"
covid_quintile$covid_mortality_quintile <- covid_quintile$covid_mortality_quintile %>% as.factor
covid_quintile <- tibble(covid_quintile)
write.csv(covid_quintile,"~/STA 518/BrookemWalters-Portfolio/Stats 518 Final Project/Bootstrapping/covid_census_bs.csv")
covid_quintile_chart <- covid_quintile %>%
rename(
Population = "population19E",
Households = "householdsE",
`Median Age` = "median_ageE",
`Median Household Income` = "median_incomeE",
`% A25+ College Grad+` = "bach_degree_plus_a25E",
`% of HH on Public Assist.` = "public_assist_rate",
`Unemployment Rate` = "unemployment_rate",
`% Asian` = "percent_asian",
`% Black` = "percent_black",
`% Native American` = "percent_native",
`% Pacific Islander` = "percent_pacific_islander",
`% White` = "percent_white",
`% Hispanic` = "percent_hispanic",
`Total Covid Deaths` = "Total_Deaths",
`Covid Deaths Per Thousand` = "Deaths_Per_Pop_Thousand"
)
mortal_palette <- c("#999999", "#00798c", "#66a182", "#edae49", "#dc0000b2")
assist_sp <- covid_quintile_chart %>%
ggplot( aes(x = `% of HH on Public Assist.`, y = `Covid Deaths Per Thousand`, size = `Total Covid Deaths`, color=covid_mortality_quintile, text=County)) +
geom_point(alpha = 6/10) +
scale_size_continuous(range = c(2, 15)) +
labs(title = "COVID-19 Mortality Rate by Percent of Households on Assistance",
subtitle = "Michigan Counties, deaths per 1000",
caption = print_sources,
color = "Quintile", size = NULL) +
scale_color_manual(values = mortal_palette) +
theme_fivethirtyeight() +
theme(axis.title = element_text())
#save the static plot to the "plots" folder
jpeg(filename = "~/STA 518/BrookemWalters-Portfolio/Stats 518 Final Project/Plots/assist_sp.jpeg",
width = 1000, height = 650)
assist_sp
#display the interactive version
ia_assist_sp <- ggplotly(assist_sp )
## Warning: plotly.js does not (yet) support horizontal legend items
## You can track progress here:
## https://github.com/plotly/plotly.js/issues/53
ia_assist_sp
#static version
age_sp <- covid_quintile_chart %>%
ggplot( aes(x = `Median Age`, y = `Covid Deaths Per Thousand`, size = `Total Covid Deaths`, color=covid_mortality_quintile, text=County)) +
geom_point(alpha = 6/10) +
scale_size_continuous(range = c(2, 15)) +
labs(title = "COVID-19 Mortality Rate by Median Age",
subtitle = "Michigan Counties, deaths per 1000",
caption = print_sources,
color = "Quintile", size = NULL) +
scale_color_manual(values = mortal_palette) +
theme_fivethirtyeight() +
theme(axis.title = element_text())
#save the static plot to the "plots" folder
jpeg(filename = "~/STA 518/BrookemWalters-Portfolio/Stats 518 Final Project/Plots/age_sp.jpeg",
width = 1000, height = 650)
age_sp
#display the interactive version
ia_age_sp <-ggplotly(age_sp)
## Warning: plotly.js does not (yet) support horizontal legend items
## You can track progress here:
## https://github.com/plotly/plotly.js/issues/53
ia_age_sp
ed_sp <- covid_quintile_chart %>%
ggplot( aes(x = `% A25+ College Grad+`, y = `Covid Deaths Per Thousand`, size = `Total Covid Deaths`, color=covid_mortality_quintile, text=County)) +
geom_point(alpha = 6/10) +
scale_size_continuous(range = c(2, 15)) +
labs(title = "COVID-19 Mortality Rate by Higher Education Attainment",
subtitle = "Michigan Counties, deaths per 1000",
caption = print_sources,
color = "Quintile", size = NULL) +
scale_color_manual(values = mortal_palette) +
theme_fivethirtyeight() +
theme(axis.title = element_text())
#save the static plot to the "plots" folder
jpeg(filename = "~/STA 518/BrookemWalters-Portfolio/Stats 518 Final Project/Plots/ed_sp.jpeg",
width = 1000, height = 650)
ed_sp
#display the interactive version
ia_ed_sp <- ggplotly(ed_sp )
## Warning: plotly.js does not (yet) support horizontal legend items
## You can track progress here:
## https://github.com/plotly/plotly.js/issues/53
ia_ed_sp
income_sp <- covid_quintile_chart %>%
ggplot( aes(x = `Median Household Income`, y = `Covid Deaths Per Thousand`, size = `Total Covid Deaths`, color= covid_mortality_quintile, text=County)) +
geom_point(alpha = 6/10) +
scale_size_continuous(range = c(2, 15)) +
scale_x_continuous(labels=scales::dollar_format()) +
labs(title ="COVID-19 Mortality Rate by Median Household Income",
subtitle = "Michigan Counties, deaths per 1000",
caption = print_sources,
color = "Quintile", size = NULL) +
scale_color_manual(values = mortal_palette) +
theme_fivethirtyeight() +
theme(axis.title = element_text())
#save the static plot to the "plots" folder
jpeg(filename = "~/STA 518/BrookemWalters-Portfolio/Stats 518 Final Project/Plots/income_sp.jpeg",
width = 1000, height = 650)
income_sp
#display the interactive version
ia_income_sp <- ggplotly(income_sp )
## Warning: plotly.js does not (yet) support horizontal legend items
## You can track progress here:
## https://github.com/plotly/plotly.js/issues/53
ia_income_sp