#removing whitespace
vax_rates3$county <- str_trim(vax_rates3$county)
flu_rates_df$county <- str_trim(flu_rates_df$county)
#joining two data sets by county and age category
joined_data <- inner_join(vax_rates3, flu_rates_df, by = c("county", "age_cat"))
#mutating flu infection rate to new_infections over total population to match covid infection rates
joined_data2 <- joined_data %>%
mutate(flu_infection_rate = (new_infections / total_pop) *100)
#selecting variables to include
joined_data3 <- joined_data2 %>%
select(c("county", "age_cat", "total_pop", "total_full_vax", "vax_rate", "new_infections", "flu_infection_rate"))
#renaming columns
joined_data3 <- joined_data3 %>%
rename(total_covid_vax = total_full_vax, covid_vax_rate = vax_rate, cumulative_flu_infections = new_infections)
#reoving outliers and rounding rates
joined_data4 <- joined_data3 %>%
filter(flu_infection_rate < 100) %>%
mutate(flu_infection_rate = round(flu_infection_rate, 2),
covid_vax_rate = round(covid_vax_rate, 2))
#Graph 1: scatterplot of counties comparing flu infection rate vs covid vax rate
ggplot(joined_data4, mapping = aes(x = covid_vax_rate, y = flu_infection_rate, label = county)) + geom_point(aes(color = county), size = 1) +
geom_smooth() +
labs(title = "COVID-19 Vaccination Rates vs. Flu Infection Rates", subtitle = "All Counties in California",
x = "COVID Vaccination Rate (%)",
y = "Flu Infection Rate (%)", color = "County") +
theme(legend.position = "none")
Graph 1: Shows the COVID Vaccination Rate by Flu Infection Rate per
county. The graph does suggest a correlation between higher vaccination
rates and lower flu infection rates.
#Graph 2: catterplot of age categories comparing flu infection rate vs covid vax rate
ggplot(joined_data4, mapping = aes(x = covid_vax_rate, y = flu_infection_rate)) +
geom_point(aes(color = age_cat)) +
geom_smooth() +
labs(title = "COVID-19 Vaccination Rates vs. Flu Infection Rate by Age", subtitle = "All Counties in California",
x = "COVID Vaccination Rate (%)",
y = "Flu Infection Rate (%)", color = "Age Category")
Graph 2: This graphic shows the COVID vaccination rates by Flu infection
rates per the four different age categories within each county. 18+ year
olds tended to have higher COVID vaccination rates compared to the 0-17
age group, and the 0-17 age group experienced the highest flu infection
rates with the lowest vaccination rates. In general, as age category
increases so does the tendency to have higher COVID vax rate and a lower
flu infection rate.
#creating new df and summarizing data
county_data <- joined_data3 %>%
group_by(county) %>%
summarize(total_pop = sum(total_pop, na.rm = T),
total_covid_vax = sum(total_covid_vax, na.rm = T),
cumulative_flu_infections = sum(cumulative_flu_infections, na.rm = T))
#adding rates
county_data2 <- county_data %>%
mutate(flu_infection_rate = (cumulative_flu_infections / total_pop) *100,
covid_vaccination_rate = (total_covid_vax / total_pop) * 100)
#Graph 3: plot of covid vax rates vs flu infection rate by county
scatterplot1 <- ggplot(county_data2, mapping = aes(x = covid_vaccination_rate, y = flu_infection_rate)) +
geom_point() +
geom_smooth() +
geom_label(label = county_data2$county) +
labs(title = "Flu Infection Rate by COVID-19 Vaccination Rate per County",
x = "COVID Vaccination Rate (%) ",
y = "Flu Infection Rate (%) ")
scatterplot1
Graph 3: This scatterplot depicts COVID Vaccination rates by Flu
infection rates across all California counties. It shows a correlation
between higher covid vaccination rates and lower flu infection
rates.
Table 1: This table includes all county data with age categories 0-17, 18-49, 50-64, and 65+. It includes the total population, COVID vaccination rates per 100 persons, the total flu infections, and flu infection rate per 100 persons.