Quick prototyping using multiple tools on the same dataset to produce some basic chart types. “Minimally processed” with each tool’s default backgrounds retained.
# COVID-19 Vaccination Demographics in the United States,National, Updated February 2, 2022
# data.cdc.gov/Vaccinations/COVID-19-Vaccination-Demographics-in-the-United-St/km4m-vcsb
covid <- read.csv("COVID-19_Vaccination_Demographics_in_the_United_States_National.csv",
stringsAsFactors = F,
header = T,
sep = '\t')
# str(covid)
# dim(covid)
start_dt_str <- "2021-10-31"
end_dt_str <- "2022-01-31"
covid <- covid %>%
mutate(
Date = as.Date(Date, format = '%m/%d/%Y')
, Administered_Dose1 = parse_number(Administered_Dose1)
, Series_Complete_Yes = parse_number(Series_Complete_Yes)
, Booster_Doses_Yes = parse_number(Booster_Doses_Yes)
, Booster_Doses_Yes_Last14Days = parse_number(Booster_Doses_Yes_Last14Days)
, Segment = case_when(
Demographic_category %in% c("Age_known", "Ages_<5yrs", "Ages_5-11_yrs", "Ages_12-15_yrs", "Ages_16-17_yrs", "Sex_known","Race_eth_known") ~ "Remove"
, Demographic_category == "US" ~ "Total"
, str_starts(Demographic_category, "Sex_") ~ "Gender"
, str_starts(Demographic_category, "Age") ~ "Age"
, TRUE ~ "Other"
)
, Monthend = case_when(
Date == ceiling_date(Date, "month") - days(1) ~ format(Date, "%b %Y")
, TRUE ~ ""
)
, Periods = case_when(
Date == as.Date(start_dt_str) ~ "Start"
, Date == as.Date(end_dt_str) ~ "End"
, TRUE ~ ""
)
,Demographic_category = str_replace(Demographic_category, "Age_", "Ages_")
, Age = case_when(
Segment == "Age" ~ str_replace(Demographic_category, "Ages_"," ")
, TRUE ~ ""
)
, Gender = case_when(
Segment == "Gender" ~ str_replace(Demographic_category, "Sex_","")
, TRUE ~ ""
)
) %>%
filter(Date >= as.Date(start_dt_str) &
Date <= as.Date(end_dt_str))
covid_last <- covid %>%
filter(Periods == "End") %>%
select('Demographic_category', 'Date', 'Series_Complete_Yes') %>%
rename(Month_Last = Date,
Series_Complete_Last = Series_Complete_Yes)
covid_first <- covid %>%
filter(Periods == "Start") %>%
select('Demographic_category', 'Date', 'Series_Complete_Yes') %>%
rename(Month_Start = Date,
Series_Complete_Start = Series_Complete_Yes)
covid_last <- covid %>%
left_join(covid_first) %>%
left_join(covid_last) %>%
mutate(
Pct_Chg_Start = (Series_Complete_Yes - Series_Complete_Start) / Series_Complete_Start
, Pct_of_Last = Series_Complete_Yes / Series_Complete_Last
, Gender = case_when(
Demographic_category == "US" ~ "ALL"
, TRUE ~ Gender)
, Age = case_when(
Demographic_category == "US" ~ "ALL"
, TRUE ~ Age)
) %>%
mutate(
Age = fct_relevel(Age, 'ALL', ' <12yrs',' 12-17_yrs',' 18-24_yrs',' 25-39_yrs',' 40-49_yrs',' 50-64_yrs',' 65-74_yrs',' 75+_yrs',' unknown')
, Gender = fct_relevel(Gender, 'ALL', 'Female','Male','unknown')
)
## Joining, by = "Demographic_category"
## Joining, by = "Demographic_category"
# writexl::write_xlsx(covid_last,"covid_last_raw.xlsx")
# save(covid_last, file = "covid_last.RData")
rm(covid_first, covid)
# str(covid_last)
# dim(covid_last)
# chart 1 US Covid 19 Vaccine Series Completions
# % Change from ME Oct 2021, as of ME Jan 2022
covid_last %>%
filter(
(Periods == "End") &
(str_length(Gender) > 1)) %>%
group_by(Gender) %>%
ggplot(aes(x=Gender, y=Pct_Chg_Start, fill=Gender, label=(
paste0(format(Pct_Chg_Start * 100, digits=2)," %")))) +
geom_col(show.legend=F) +
geom_text(size = 3, position = position_stack(vjust = 0.8)) +
labs(title = "US Covid 19 Vaccine Series Completions",
subtitle = "Percentage increase of completions in 3 months was about 9 percent.",
x = "% Change from monthend Oct 2021, as of monthend Jan 2022",
y = "",
caption = "(Demographics based on data from the CDC, 2/2/2022)") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank()
)
# chart 2 US Covid 19 Vaccine Series Completions
# % Change from ME Oct 2021, thru ME Jan 2022
end_values <- covid_last %>%
filter(
(Periods == "End") &
(Demographic_category == "US"))
covid_last %>%
filter(
(Demographic_category == "US")) %>%
arrange(Date) %>%
group_by(Date) %>%
ggplot(aes(x=Date, y=Pct_Chg_Start, label=(
paste0(format(Pct_Chg_Start * 100, digits=2)," %")))) +
geom_line(col="blue", show.legend=F) +
# geom_text(size = 3, position = position_stack(vjust = 1)) +
labs(title = "US Covid 19 Vaccine Series Completions",
subtitle = "Percentage increase of completions in 3 months was about 9 percent.",
x = "% Change from monthend Oct 2021, as of monthend Jan 2022",
y = "",
caption = "(Demographics based on data from the CDC, 2/2/2022)") +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank()
) +
scale_x_date(labels = scales::date_format("%m-%Y")) +
ggrepel::geom_text_repel(
aes(label=(
paste0(format(Pct_Chg_Start * 100, digits=2)," %"))), data = end_values,
fontface ="plain", color = "black", size = 3, hjust=-2, vjust=1.2
) +
geom_vline(xintercept=c(as.Date("2021-11-02")), linetype='dashed', color=c('red')) +
annotate("text", x = as.Date("2021-11-25"), y = 0.075, col="red", size =3, label = "Nov 2 - CDC endorses vaccines for ages 5 - 11\n Nov 29 - CDC recommends booster shots for 18+")