The purpose of this script is to run descriptive statistics for various demographics variables, impediments, and best treatments, on the full pilot data.
Insert Description of Full Pilot Here
Cleaning and Filtering
The data used in this script has already been cleaned in the data cleaning script.
This descriptive statistics script will use the version 8 data, filtered to completes and randomized only. Also, observation with missing vax status are dropped. There are 4,349 observations.
One additional cleaning step in this analysis remove observations for which vaccination status is “other” (1 observation for v7). We also run the factor_cols.R file, which provides order to variables (ie. location: urban > suburban > rural > other > prefer not to say), since these orderings are not saved in the csv files.
Starred variables have a corresponding variable, with the suffix _num (ie education_num) that provides a numeric ordering of the variables. This order is listed in parenthesis.
| Variable | Description |
|---|---|
| vax_status* | unvax (0) or unvax (1) |
| gender* | male (0) or female (1) |
| age | integer 18 to 99 |
| education* | < high school (1), high school (2), some college (3), 2-year degree (4), 4-year degree (5), graduate degree (6), other, prefer not to say |
| location* | rural(1), suburban (2), urban(3), other, prefer not to say |
| religiosity* | not very religious (1), somewhat religious (2), very religious (3), other, prefer not to say |
| black | 1 if the participant is a black or african, 0 if not |
| ethnicity | asian or indian, black or african, white or caucasian, other, prefer not to say |
| Variable | Description |
|---|---|
| motive_main | benefit, risk, other |
| belief_main | if motive_main == belief: freedom to choose, religious reasons, don’t trust gov/org, other reason(s), other |
| benefit_main | if motive_main == benefit: covid not dangerous, had covid already, unlikely to get sick, vaccines don’t work, other reason(s) |
| risk_main | if motive_main == risk: bad side effects, don’t trust pharma, needles/pain, not enough testing, vaccines don’t work, other reason(s), other |
| ability_main | availability, money, time, other |
| availability_main | if ability_main == availability: no vaccines left, too far away, other reason(s) |
| money_main | if ability_main == money: no cash, no insurance, travel costs, other reason(s) |
| time_main | if ability_main == time: childcare, getting off worke, no time to research, other reason(s), other |
| Variable | Description |
|---|---|
| best_treatment | family supports it, rewards for vaxxing, trusted info source, something else, nothering, other |
df %>%
select_if(is.numeric) %>%
select(vax_status = vax_status_num, age, education = education_num, location=location_num, religiosity = religiosity_num, gender = gender_num, black) %>%
describe(quant = c(.25,.75) ) %>%
select(n, mean, sd, se, min, min, first_quartile = Q0.25, median, third_quartile = Q0.75, max) %>%
clean_names(case = "title") %>%
rename(SD = Sd, SE = Se) %>%
kable(caption = "Summary Statistics for Continuous Demographic Variables", digits = 3)%>%
kable_styling(bootstrap_options = c("striped", "hover"))
| N | Mean | SD | SE | Min | First Quartile | Median | Third Quartile | Max | |
|---|---|---|---|---|---|---|---|---|---|
| vax_status | 4349 | 0.689 | 0.463 | 0.007 | 0 | 0 | 1 | 1 | 1 |
| age | 4299 | 25.890 | 7.269 | 0.111 | 18 | 21 | 24 | 28 | 99 |
| education | 4107 | 3.200 | 1.622 | 0.025 | 1 | 2 | 3 | 5 | 6 |
| location | 4133 | 2.147 | 0.834 | 0.013 | 1 | 1 | 2 | 3 | 3 |
| religiosity | 3868 | 2.481 | 0.681 | 0.011 | 1 | 2 | 3 | 3 | 3 |
| gender | 4334 | 0.345 | 0.476 | 0.007 | 0 | 0 | 0 | 1 | 1 |
| black | 4347 | 0.926 | 0.262 | 0.004 | 0 | 1 | 1 | 1 | 1 |
df %>%
select(vax_status, age, education = education_num, location=location_num, religiosity = religiosity_num, gender = gender_num, black) %>%
split(.$vax_status) %>%
map(describe) %>%
do.call(rbind, .) %>%
data.frame() %>%
select(n, mean , se) %>%
rownames_to_column() %>%
separate(rowname, c("vax_status", "variable"), sep = "\\.") %>%
filter(variable != "vax_status*") %>%
pivot_longer(cols = c("n", "mean", "se")) %>%
mutate(column = paste(vax_status, name, sep = "_")) %>%
select(column, variable, value) %>%
pivot_wider(names_from = column) %>%
kable(digits = 3,caption = "Summary Statistics for Continuous Demographic Variables by Vaccination Status",
col.names = c("", "N", "Mean", "SE", "N", "Mean", "SE")) %>%
kable_styling(bootstrap_options = c("striped", "hover"))%>%
add_header_above(c(" " = 1, "Unvaccinated" = 3, "Vaccinated" = 3))
| N | Mean | SE | N | Mean | SE | |
|---|---|---|---|---|---|---|
| age | 1328 | 25.149 | 0.198 | 2971 | 26.221 | 0.133 |
| education | 1260 | 3.202 | 0.045 | 2847 | 3.198 | 0.031 |
| location | 1280 | 2.172 | 0.023 | 2853 | 2.135 | 0.016 |
| religiosity | 1174 | 2.434 | 0.021 | 2694 | 2.501 | 0.013 |
| gender | 1349 | 0.361 | 0.013 | 2985 | 0.338 | 0.009 |
| black | 1353 | 0.919 | 0.007 | 2994 | 0.929 | 0.005 |
df %>%
select(gender, ethnicity, education, religion, location) %>%
summarize_factor() %>%
data.frame() %>%
rename(variable = "X.", Percent = "X..2") %>%
select(!X..1) %>%
mutate(variable = ifelse(variable == "", NA, variable)) %>%
fill(data = ., variable) %>%
arrange(variable) %>%
kable(digits = 3,caption = "Summary Statistics for Factor Demographic Variables",
col.names = c("", "Value", "N", "Percent")) %>%
kable_styling(bootstrap_options = c("striped", "hover"))%>%
collapse_rows() %>%
scroll_box( height = "500px")
| Value | N | Percent | |
|---|---|---|---|
| education | < high school | 667 | 15.3 |
| high school | 910 | 20.9 | |
| some college | 1097 | 25.2 | |
| 2-year degree | 333 | 7.7 | |
| 4-year degree | 569 | 13.1 | |
| graduate degree | 531 | 12.2 | |
| other | 94 | 2.2 | |
| prefer not to say | 148 | 3.4 | |
| ethnicity | asian or indian | 14 | 0.3 |
| black or african | 4025 | 92.6 | |
| white or caucasian | 49 | 1.1 | |
| other | 167 | 3.8 | |
| prefer not to say | 92 | 2.1 | |
| <Missing> | 2 | 0.0 | |
| gender | female | 1497 | 34.4 |
| male | 2837 | 65.2 | |
| <Missing> | 15 | 0.3 | |
| location | urban | 1785 | 41.0 |
| suburban | 1169 | 26.9 | |
| rural | 1179 | 27.1 | |
| other | 34 | 0.8 | |
| prefer not to say | 181 | 4.2 | |
| <Missing> | 1 | 0.0 | |
| religion | african traditional | 88 | 2.0 |
| christian | 3623 | 83.3 | |
| hinduism | 7 | 0.2 | |
| islam | 462 | 10.6 | |
| judaism | 4 | 0.1 | |
| no religion | 53 | 1.2 | |
| other | 36 | 0.8 | |
| prefer not to say | 75 | 1.7 | |
| <Missing> | 1 | 0.0 |
df %>%
select(vax_status, gender, ethnicity, education, religion, location) %>%
split(.$vax_status) %>%
map(summarize_factor) %>%
do.call(rbind, .) %>%
data.frame() %>%
rownames_to_column("vax_status") %>%
rename(variable= "X.", Percent = "X..2") %>%
select(!X..1) %>%
mutate(variable = ifelse(variable == "", NA, variable)) %>%
fill(data = ., variable) %>%
mutate(vax_status = str_extract(vax_status, "unvax|vax"),
Percent= as.numeric(Percent)) %>%
pivot_longer(cols = c("N", "Percent")) %>%
mutate(column = paste(vax_status, name, sep = "_")) %>%
select(column, variable, Level, value) %>%
pivot_wider(names_from = "column") %>%
as.data.frame() %>%
arrange(variable) %>%
kable(digits = 3,caption = "Summary Statistics for Factor Demographic Variables by Vaccination Status",
col.names = c("", "Value", "N", "Percent", "N", "Percent")) %>%
kable_styling(bootstrap_options = c("striped", "hover"))%>%
add_header_above(c(" " = 2, "Unvaccinated" = 2, "Vaccinated" = 2))%>%
collapse_rows() %>%
scroll_box( height = "500px")
| Value | N | Percent | N | Percent | |
|---|---|---|---|---|---|
| education | < high school | 195 | 14.4 | 472 | 15.8 |
| high school | 298 | 22.0 | 612 | 20.4 | |
| some college | 318 | 23.5 | 779 | 26.0 | |
| 2-year degree | 106 | 7.8 | 227 | 7.6 | |
| 4-year degree | 192 | 14.2 | 377 | 12.6 | |
| graduate degree | 151 | 11.2 | 380 | 12.7 | |
| other | 32 | 2.4 | 62 | 2.1 | |
| prefer not to say | 62 | 4.6 | 86 | 2.9 | |
| ethnicity | asian or indian | 3 | 0.2 | 11 | 0.4 |
| black or african | 1244 | 91.9 | 2781 | 92.9 | |
| white or caucasian | 17 | 1.3 | 32 | 1.1 | |
| other | 52 | 3.8 | 115 | 3.8 | |
| prefer not to say | 37 | 2.7 | 55 | 1.8 | |
| <Missing> | 1 | 0.1 | 1 | 0.0 | |
| gender | female | 487 | 36.0 | 1010 | 33.7 |
| male | 862 | 63.7 | 1975 | 65.9 | |
| <Missing> | 5 | 0.4 | 10 | 0.3 | |
| location | urban | 555 | 41.0 | 1230 | 41.1 |
| suburban | 390 | 28.8 | 779 | 26.0 | |
| rural | 335 | 24.7 | 844 | 28.2 | |
| other | 10 | 0.7 | 24 | 0.8 | |
| prefer not to say | 64 | 4.7 | 117 | 3.9 | |
| <Missing> | NA | NA | 1 | 0.0 | |
| religion | african traditional | 28 | 2.1 | 60 | 2.0 |
| christian | 1128 | 83.3 | 2495 | 83.3 | |
| hinduism | 1 | 0.1 | 6 | 0.2 | |
| islam | 139 | 10.3 | 323 | 10.8 | |
| judaism | 0 | 0.0 | 4 | 0.1 | |
| no religion | 17 | 1.3 | 36 | 1.2 | |
| other | 12 | 0.9 | 24 | 0.8 | |
| prefer not to say | 29 | 2.1 | 46 | 1.5 | |
| <Missing> | NA | NA | 1 | 0.0 |
The following tabs show the correlations between demographics, and between demographics and motivation and ability impediments.
numeric_cols <- c("gender_num", "education_num",
"religiosity_num", "location_num",
"black", "vax_status_num"
)
df_numeric_demo <- df %>%
select(all_of(numeric_cols))
colnames(df_numeric_demo) <-str_to_title(str_replace(str_remove(colnames(df_numeric_demo), "_num"), "_", " "))
ggcorrplot(cor(df_numeric_demo, use = "pairwise.complete.obs"), type = "lower", lab = TRUE, lab_size = 12/.pt, tl.cex = 10, colors= c(cb_colors[1], "white", cb_colors[2])) +
custom_theme +
labs(y = "", x = "", title = "Correlation Matrix: \nFor Demographic Variables",
caption = paste("Number of Observations:", scales::comma(nrow(df_numeric_demo))))+
theme(axis.text.x = element_text(angle = 45, hjust=1))
other_data <- df %>%
select(ethnicity, religiosity, location, education) %>%
filter(!is.na(ethnicity) & !is.na(religiosity) & !is.na(location) & !is.na(education)) %>%
mutate(ethn_o = ifelse(ethnicity == "other", 1, 0),
relig_o = ifelse(religiosity == "other", 1, 0),
loc_o = ifelse(location == "other", 1, 0),
edu_o = ifelse(education == "other", 1, 0)) %>%
select(ethnicity = ethn_o, religiosity = relig_o, location = loc_o, education = edu_o)
ggcorrplot(cor(other_data, use = "pairwise.complete.obs"), type = "lower", lab = TRUE, lab_size = 12/.pt, tl.cex = 10, colors= c(cb_colors[1], "white", cb_colors[2])) +
custom_theme +
labs(y = "", x = "", title = "Correlation Matrix: \nFor Demographic Variables (Other)",
caption = paste("Number of Observations:", scales::comma(nrow(df_numeric_demo))))+
theme(axis.text.x = element_text(angle = 45, hjust=1))
risky <- df %>%
mutate(
risky = if_else(motive_main == "risky", 1L, 0L)) %>%
select(risky, gender_num, education_num, religiosity_num, location_num, black) %>%
cor(use = "pairwise.complete.obs") %>%
.[1, 2:6]
against_beliefs <- df %>%
mutate(
against_beliefs = if_else(motive_main == "beliefs", 1L, 0L)) %>%
select(against_beliefs, gender_num, education_num, religiosity_num, location_num, black) %>%
cor(use = "pairwise.complete.obs") %>%
.[1, 2:6]
no_benefits <- df %>%
mutate(
no_benefits = if_else(motive_main == "benefit", 1L, 0L)) %>%
select(no_benefits, gender_num, education_num, religiosity_num, location_num, black) %>%
cor(use = "pairwise.complete.obs") %>%
.[1, 2:6]
final_mat <-
tibble(
against_beliefs,
no_benefits, risky
) %>%
as.matrix()
rownames(final_mat) <- c("gender","education", "religiosity", "location", "black")
colnames(final_mat) <- c( "against_beliefs", "no_benefits", "risky")
ggcorrplot(final_mat, lab = TRUE, lab_size = 12/.pt, tl.cex = 10, colors= c(cb_colors[1], "white", cb_colors[2])) +
custom_theme +
labs(y = "", x = "", title = "Correlation Matrix: \nMotivation and Demographic Variables", caption =
paste("Number of Observations:", scales::comma(nrow(df))))+
theme(axis.text.x = element_text(angle = 45, hjust=1))
availability <- df %>%
mutate(
availability = if_else(ability_main == "availability", 1L, 0L)) %>%
select(availability, gender_num, education_num, religiosity_num, location_num, black) %>%
cor(use = "pairwise.complete.obs") %>%
.[1, 2:6]
money <- df %>%
mutate(
money = if_else(ability_main == "money", 1L, 0L)) %>%
select(money, gender_num, education_num, religiosity_num, location_num, black) %>%
cor(use = "pairwise.complete.obs") %>%
.[1, 2:6]
time <- df %>%
mutate(
time = if_else(ability_main == "time", 1L, 0L)) %>%
select(time, gender_num, education_num, religiosity_num, location_num, black) %>%
cor(use = "pairwise.complete.obs") %>%
.[1, 2:6]
final_mat <-
tibble(
availability,
money, time
) %>%
as.matrix()
rownames(final_mat) <- c("gender", "education", "religiosity","location", "black")
colnames(final_mat) <- c( "availability", "money", "time")
ggcorrplot(final_mat, lab = TRUE, lab_size = 12/.pt, tl.cex = 10, colors= c(cb_colors[1], "white", cb_colors[2])) +
custom_theme +
labs(y = "", x = "", title = "Correlation Matrix: \nAbility and Demographic Variables",
caption =
paste("Number of Observations:", scales::comma(nrow(df))))+
theme(axis.text.x = element_text(angle = 45, hjust=1))
Next, we show the number of participants in each of the eight forks, and the percentage of participants at the vaccination status level.
If a participant responds to motive with unsure or no, they are assigned to “no motive”. If a participant responds to ability with somewhat hard or really hard, they are assigned to “no ability”.
df %>%
group_by(vax_status, ability, motive) %>%
mutate(ability = str_to_title(ifelse(ability == "easy", "ability", "no ability")),
motive = str_to_title(ifelse(motive == "yes", "motive", "no motive"))) %>%
count() %>%
drop_na(ability, motive) %>%
ungroup() %>%
group_by(vax_status) %>%
mutate(`% of vax status` = round(100 * n/sum(n, na.rm = T), 2),
vax_status = str_to_title(ifelse(vax_status == "vax", "Vaccinated", "Unvaccinated"))) %>%
ungroup() %>%
arrange(vax_status, ability, motive) %>%
kable(col.names =
c("", "", "", "Count", "Percentage within Vaccination Status"),
align = "c",
caption = "Distribution of forking segments of impediments") %>%
kable_styling()
| Count | Percentage within Vaccination Status | |||
|---|---|---|---|---|
| Unvaccinated | Ability | Motive | 272 | 20.10 |
| Unvaccinated | Ability | No Motive | 443 | 32.74 |
| Unvaccinated | No Ability | Motive | 259 | 19.14 |
| Unvaccinated | No Ability | No Motive | 379 | 28.01 |
| Vaccinated | Ability | Motive | 1390 | 51.41 |
| Vaccinated | Ability | No Motive | 610 | 22.56 |
| Vaccinated | No Ability | Motive | 461 | 17.05 |
| Vaccinated | No Ability | No Motive | 243 | 8.99 |
stacked_bar <- function(df, variable,filter_phrase, fill_label, title_label, caption_label){
variable <- enquo(variable)
df_plot <- df %>%
filter(if(!is.null(filter_phrase)) eval(parse(text = filter_phrase)) else TRUE) %>%
select(!!variable, vax_status, vax_status_num) %>%
group_by(!!variable, vax_status, vax_status_num) %>%
count() %>%
group_by(vax_status_num, vax_status) %>%
mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = ""),
percent = ifelse(round(n/sum(n) * 100, 0) < 6, "", percent))
ggplot(df_plot, aes( x = vax_status_num, y = n, fill =as.factor(!!variable)))+
geom_bar(position = "fill", stat ="identity", color = "white", width = .7) +
custom_theme +
scale_x_continuous(breaks = c(0, 1), labels = c("Unvaccinated", "Vaccinated"))+
scale_fill_manual(values = c(cb_colors, "red", "green", "blue", "yellow"), labels = str_to_title)+
geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") +
labs(x = "", y = "", fill= fill_label,
title = title_label,
caption = paste("Number of Observations:",scales::comma(sum(df_plot$n)) , "\n", caption_label)) +
scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%"))
}
stacked_bar2 <- function(df, variable,filter_phrase, fill_label, title_label, caption_label){
variable <- enquo(variable)
df_plot <- df %>%
filter(if(!is.null(filter_phrase)) eval(parse(text = filter_phrase)) else TRUE) %>%
select(!!variable, vax_status, vax_status_num) %>%
group_by(!!variable, vax_status, vax_status_num) %>%
count() %>%
group_by(vax_status_num, vax_status) %>%
mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = ""),
percent = ifelse(round(n/sum(n) * 100, 0) < 6, "", percent),
p = rev(cumsum(rev(n/sum(n)))))
ggplot(df_plot, aes( x = vax_status_num, y = n, fill =as.factor(!!variable)))+
geom_bar(position = "fill", stat ="identity", color = "white", width = .7) +
custom_theme +
scale_x_continuous(breaks = c(0, 1), labels = c("Unvaccinated", "Vaccinated"), limits = c(NA, 2.4))+
scale_fill_manual(values = c(cb_colors, "red", "green", "blue", "yellow"), labels = str_to_title)+
geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") +
labs(x = "", y = "", fill= fill_label,
title = title_label,
caption = paste("Number of Observations: ", scales::comma(sum(df_plot$n)),"\n", caption_label)) +
scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) +
geom_text_repel(data = . %>% filter(vax_status == "vax") , aes(x = vax_status_num+0.3, y = p- 0.01,
label = str_to_title(!!variable), color = !!variable),
nudge_x = 0.15, direction = "y", hjust = "left", size = 16/.pt) +
scale_color_manual(values = c(cb_colors, "red", "green", "blue", "yellow")) +
theme(legend.position = "none")
}
First, we show the percentage of participants by vaccination status.
df_plot <- df %>%
select(vax_status) %>%
group_by(vax_status) %>%
count() %>%
ungroup() %>%
mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = ""))
ggplot(df_plot, aes( x = 1, y = n, fill = vax_status))+
geom_bar(position = "fill", stat ="identity", width = .7, color = "white") +
geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") +
coord_flip() +
custom_theme +
scale_x_continuous(breaks = c(1), limits = c(0.6, 1.8)) +
scale_fill_manual(values = c(cb_colors[3], cb_colors[6]), labels = c("Unvax", "Vax")) +
labs(x = "", y ="", title = "Percentage of Participants by Vaccination Status",
caption = paste("Number of Observations:", scales::comma(sum(df_plot$n))))+
geom_label(aes(x = 1.6, y = .29), label = "Vaccinated", size = 16/.pt, label.size = NA, fill = "white", color = cb_colors[6])+
geom_label(aes(x = 1.6, y = 0.79), label = "Unvaccinated", label.size = NA, fill = "white", color = cb_colors[3], size = 16/.pt) +
theme(legend.position = "none",
axis.text.y = element_blank()) +
scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%"))
This section visualizes various demographic variables by vaccination status.
df %>%
stacked_bar(variable = gender,
filter_phrase = NULL,
fill_label = "Gender",
title_label= "Share of Participants by Gender\nand Vaccination Status",
caption_label= "")
df_plot <- df %>%
select(age, vax_status)
ggplot(df_plot, aes(x = age, group = vax_status, fill = vax_status)) +
geom_density(alpha = 0.5) +
custom_theme +
scale_x_continuous(limits = c(18, 100), breaks = seq(0, 100, 10))+
scale_fill_manual(values = c(cb_colors[3], cb_colors[6]))+
labs(x = "Age", y = "Density", fill= "Vax Status", caption = paste("Number of Observations:", nrow(df_plot)),
title = paste("Age Distribution by <span style='color:", cb_colors[6], ";'>Vaccinated</span> and <span style='color:", cb_colors[3], ";'>Unvaccinated</span>", sep = "")) +
theme(legend.position = "none",
plot.title = element_markdown(size = 20))
df %>%
stacked_bar(variable = education,
filter_phrase = NULL,
fill_label = "Education",
title_label= "Share of Participants by Education\nand Vaccination Status",
caption_label= "")
df %>%
stacked_bar(variable = religiosity,
filter_phrase = NULL,
fill_label = "Religiosity",
title_label= "Share of Participants by Religiosity\nand Vaccination Status",
caption_label= "")
df %>%
stacked_bar(variable = religion,
filter_phrase = NULL,
fill_label = "Religion",
title_label= "Share of Participants by Religion\nand Vaccination Status",
caption_label= "")
df %>%
stacked_bar(variable = location,
filter_phrase = NULL,
fill_label = "Location",
title_label= "Share of Participants by Location\nand Vaccination Status",
caption_label= "")
df %>%
stacked_bar(variable = ethnicity,
filter_phrase = NULL,
fill_label = "Ethnicity",
title_label= "Share of Participants by Ethnicity\nand Vaccination Status",
caption_label= "")
This section visualizes various demographic variables by motivation impediments. The data used in this section are limited to participants who indicated a motive impediment.
df %>%
stacked_bar2(variable = motive_main,
filter_phrase = "motive %in% c('no', 'unsure')",
fill_label = "",
title_label= "Share of Participants by Motive\nand Vaccination Status",
caption_label= "")
For participants who identified their main motive impediment as risk.
df %>%
stacked_bar2(variable = risk_main,
filter_phrase = "motive %in% c('no', 'unsure') & motive_main == 'risky'",
fill_label = "",
title_label= "Share of Participants by Risk\nand Vaccination Status",
caption_label= "")
For participants who identified their main motive impediment as benefit.
df %>%
stacked_bar2(variable = benefit_main,
filter_phrase = "motive %in% c('no', 'unsure') & motive_main == 'benefit'",
fill_label = "",
title_label= "Share of Participants by Benefit\nand Vaccination Status",
caption_label= "")
For participants who identified their main motive impediment as belief.
df %>%
stacked_bar2(variable = belief_main,
filter_phrase = "motive %in% c('no', 'unsure') & motive_main == 'beliefs'",
fill_label = "",
title_label= "Share of Participants by Belief\nand Vaccination Status",
caption_label= "")
This section visualizes various demographic variables by ability impediments. The data used in this section are limited to participants who indicated a ability impediment.
df %>%
stacked_bar2(variable = ability_main,
filter_phrase = "ability %in% c('somewhat hard', 'really hard')",
fill_label = "",
title_label= "Share of Participants by Ability\nand Vaccination Status",
caption_label= "")
For participants who identified their main ability impediment as availability.
df %>%
stacked_bar2(variable = availability_main,
filter_phrase = "ability %in% c('somewhat hard', 'really hard') & ability_main == 'availability'",
fill_label = "",
title_label= "Share of Participants by Availability\nand Vaccination Status",
caption_label= "")
For participants who identified their main ability impediment as money.
df %>%
stacked_bar2(variable = money_main,
filter_phrase = "ability %in% c('somewhat hard', 'really hard') & ability_main == 'money'",
fill_label = "",
title_label= "Share of Participants by Money\nand Vaccination Status",
caption_label= "")
For participants who identified their main ability impediment as time.
df %>%
stacked_bar2(variable = time_main,
filter_phrase = "ability %in% c('somewhat hard', 'really hard') & ability_main == 'time'",
fill_label = "",
title_label= "Share of Participants by Time\nand Vaccination Status",
caption_label= "")
This figure shows the percentage of participants by best treatment.
Other Best Treatments refers to all other best_treatment options, including: appointments, family/friend endorses it, new trusted info, reminders, rewards for vaxxing, time off work, nothing, no, something else, or missing (NA) response.
Other refers to free text inputs.
filter_phrase = NULL
fill_label = "Best Treatment"
title_label= "Percentage of Participants by Best Treatment\nand Vaccination Status"
caption_label= ""
df_plot <- df %>%
filter(if(!is.null(filter_phrase)) eval(parse(text = filter_phrase)) else TRUE) %>%
select(best_treatment, vax_status, vax_status_num) %>%
group_by(best_treatment, vax_status, vax_status_num) %>%
count() %>%
group_by(vax_status_num, vax_status) %>%
mutate(prop = n/sum(n)) %>%
group_by(best_treatment) %>%
mutate(flag_big = ifelse(max(prop) > 1/11, 1, 0)) %>%
ungroup() %>%
mutate(best_treatment = ifelse(flag_big == 0, "Other Best Treatment", as.character(best_treatment))) %>%
select(!flag_big) %>%
group_by(best_treatment, vax_status, vax_status_num) %>%
dplyr::summarize(n = sum(n), prop = sum(prop)) %>%
ungroup() %>%
mutate(percent = paste(round(prop * 100, 0), "%", sep = ""),
#percent = ifelse(round(n/sum(n) * 100, 0) < 6, "", percent),
p = rev(cumsum(rev(n/sum(n)))))
ggplot(df_plot, aes( x = vax_status_num, y = n, fill =as.factor(best_treatment)))+
geom_bar(position = "fill", stat ="identity", color = "white", width = .7) +
custom_theme +
scale_x_continuous(breaks = c(0, 1), labels = c("Unvaccinated", "Vaccinated"), limits = c(NA, 3))+
scale_fill_manual(values = c(cb_colors, "red", "green", "blue", "yellow"), labels = str_to_title)+
geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") +
labs(x = "", y = "", fill= fill_label,
title = title_label,
caption = paste("Number of Observations: ", scales::comma(sum(df_plot$n)),"\n", caption_label)) +
scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) +
geom_text_repel(data = . %>% filter(vax_status == "vax") , aes(x = vax_status_num+0.3, y = p- 0.03,
label = str_to_title(best_treatment), color = best_treatment),
nudge_x = 0.2, direction = "y", hjust = "left", size = 16/.pt) +
scale_color_manual(values = c(cb_colors, "red", "green", "blue", "yellow")) +
theme(legend.position = "none")
This section visualizas demographics by best treatment, motivation impediments, and ability impediments for the unvaccinated participants.
The following figures show the best treatment by various demographics, for unvaccinated participants. Additionally, observations with missing entries for a best treatment are removed. Grey bars show the percentage of missing observations.
stacked_bar3 <- function(df, variable, fill_label, title_label, caption_label){
variable <- enquo(variable)
df_plot <- df %>%
filter(vax_status == "unvax" & !is.na(best_treatment)) %>%
select(best_treatment, !!variable) %>%
group_by(!!variable, best_treatment) %>%
count() %>%
group_by(best_treatment) %>%
mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = ""),
percent = ifelse(round(n/sum(n) * 100, 0) < 6, "", percent),
p = rev(cumsum(rev(n/sum(n))))
) %>%
ungroup() %>%
mutate(best_treatment_num = as.numeric(factor(best_treatment)))
ggplot(df_plot, aes(x = fct_rev(best_treatment), y = n, fill =!!variable))+
geom_col(position = "fill", color = "white", width = .7) +
custom_theme +
coord_flip() +
scale_fill_manual(values = c(cb_colors, "red", "green", "blue", "yellow"), labels = str_to_title)+
# scale_x_continuous(limits = c(NA, 13), breaks = 1:12, labels =c(levels(factor(df$best_treatment)))) +
geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") +
labs(x = "", y = "", fill = fill_label, title = title_label, caption = paste("Number of Observations:", scales::comma(sum(df_plot$n)), "\n", caption_label)) +
scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) +
theme(legend.position = "bottom")
}
stacked_bar3(df = df,
variable = gender,
fill_label ="",
title_label = "Gender by Best Treatment", caption_label = "")
stacked_bar3(df = df,
variable = ethnicity,
fill_label ="",
title_label = "Ethnicity by Best Treatment", caption_label = "")
stacked_bar3(df = df,
variable = education,
fill_label ="",
title_label = "Education by Best Treatment", caption_label = "")
stacked_bar3(df = df,
variable = location,
fill_label ="",
title_label = "Location by Best Treatment", caption_label = "")
stacked_bar3(df = df,
variable = religiosity,
fill_label ="",
title_label = "Religiosity by Best Treatment", caption_label = "")
The following figures show the best treatment by various demographics, for the unvaccinated group. Additionally, observations with missing entries for a given demographic variable are removed. Grey bars show the percentage of missing observations.
stacked_bar4 <- function(df, variable, fill_label, title_label, caption_label){
variable <- enquo(variable)
df_plot <- df %>%
filter(vax_status == "unvax" & !is.na(!!variable)) %>%
select(best_treatment, !!variable) %>%
group_by(!!variable, best_treatment) %>%
count() %>%
group_by(!!variable) %>%
mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = ""),
percent = ifelse(round(prop * 100, 0) < 6, "", percent),
p = rev(cumsum(rev(n/sum(n)))),
demo_num = as.numeric(!!variable))
ggplot(df_plot, aes( x = demo_num, y = n, fill =best_treatment))+
geom_bar(position = "fill", stat ="identity", color = "white", width = .7) +
custom_theme +
scale_fill_manual(values = c(cb_colors, "red", "green", "blue", "yellow"), labels = str_to_title)+
scale_x_continuous(breaks = 1:length(levels(df %>% pull(!!variable))),
limits = c(NA, length(levels(df %>% pull(!!variable))) +2),
labels =str_replace_all(c(levels(df %>% pull(!!variable))), " ", "\n"))+
geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") +
labs(x = "", y = "", fill = fill_label, title = title_label, caption = paste("Number of Observations:", scales::comma(sum(df_plot$n)), "\n", caption_label)) +
scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) +
geom_text_repel(data = . %>% ungroup() %>%filter(demo_num == max(demo_num)) ,
aes(x = demo_num+0.3, y = p-0.01,
label = str_to_title(best_treatment), color = best_treatment),
nudge_x = 0.3, direction = "y", hjust = "left", size = 16/.pt)+
scale_color_manual(values = c(cb_colors, "red", "green", "blue", "yellow")) +
theme(legend.position = "none")
}
df_plot <- df %>%
filter(if(!is.null(filter_phrase)) eval(parse(text = filter_phrase)) else TRUE) %>%
select(best_treatment, gender) %>%
filter(!is.na(gender)) %>%
group_by(best_treatment, gender) %>%
count() %>%
group_by(gender) %>%
mutate(prop = n/sum(n)) %>%
group_by(best_treatment) %>%
mutate(flag_big = ifelse(max(prop) > 1/11, 1, 0)) %>%
ungroup() %>%
mutate(best_treatment = ifelse(flag_big == 0, "Other Best Treatment", as.character(best_treatment))) %>%
select(!flag_big) %>%
group_by(best_treatment, gender) %>%
dplyr::summarize(n = sum(n), prop = sum(prop)) %>%
ungroup() %>%
mutate(percent = paste(round(prop * 100, 0), "%", sep = ""),
percent = ifelse(round(prop * 100, 0) < 6, "", percent),
p = rev(cumsum(rev(n/sum(n)))))
ggplot(df_plot, aes( x = as.numeric(gender), y = n, fill =as.factor(best_treatment)))+
geom_bar(position = "fill", stat ="identity", color = "white", width = .7) +
custom_theme +
scale_fill_manual(values = c(cb_colors, "red", "green", "blue", "yellow"), labels = str_to_title)+
geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") +
scale_x_continuous(limits = c(NA, length(levels(df$gender)) + .6), breaks = 1:length(levels(df$gender)),
labels =str_replace_all(c(levels(df$gender)), " ", "\n")) +
labs(x = "", y = "", fill= fill_label,
title = title_label,
caption = paste("Number of Observations: ", scales::comma(sum(df_plot$n)),"\n", caption_label)) +
scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) +
scale_color_manual(values = c(cb_colors, "red", "green", "blue", "yellow"))
df_plot <- df %>%
filter(if(!is.null(filter_phrase)) eval(parse(text = filter_phrase)) else TRUE) %>%
select(best_treatment, ethnicity) %>%
filter(!is.na(ethnicity)) %>%
group_by(best_treatment, ethnicity) %>%
count() %>%
group_by(ethnicity) %>%
mutate(prop = n/sum(n)) %>%
group_by(best_treatment) %>%
mutate(flag_big = ifelse(max(prop) > 1/11, 1, 0)) %>%
ungroup() %>%
mutate(best_treatment = ifelse(flag_big == 0, "Other Best Treatment", as.character(best_treatment))) %>%
select(!flag_big) %>%
group_by(best_treatment, ethnicity) %>%
dplyr::summarize(n = sum(n), prop = sum(prop)) %>%
ungroup() %>%
mutate(percent = paste(round(prop * 100, 0), "%", sep = ""),
percent = ifelse(round(prop * 100, 0) < 6, "", percent),
p = rev(cumsum(rev(n/sum(n)))))
ggplot(df_plot, aes( x = as.numeric(ethnicity), y = n, fill =as.factor(best_treatment)))+
geom_bar(position = "fill", stat ="identity", color = "white", width = .7) +
custom_theme +
scale_fill_manual(values = c(cb_colors, "red", "green", "blue", "yellow"), labels = str_to_title)+
geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") +
scale_x_continuous(limits = c(NA, length(levels(df$ethnicity)) + .6), breaks = 1:length(levels(df$ethnicity)),
labels =str_replace_all(c(levels(df$ethnicity)), " ", "\n")) +
labs(x = "", y = "", fill= fill_label,
title = title_label,
caption = paste("Number of Observations: ", scales::comma(sum(df_plot$n)),"\n", caption_label)) +
scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) +
scale_color_manual(values = c(cb_colors, "red", "green", "blue", "yellow"))
df_plot <- df %>%
filter(if(!is.null(filter_phrase)) eval(parse(text = filter_phrase)) else TRUE) %>%
select(best_treatment, education) %>%
filter(!is.na(education)) %>%
group_by(best_treatment, education) %>%
count() %>%
group_by(education) %>%
mutate(prop = n/sum(n)) %>%
group_by(best_treatment) %>%
mutate(flag_big = ifelse(max(prop) > 1/11, 1, 0)) %>%
ungroup() %>%
mutate(best_treatment = ifelse(flag_big == 0, "Other Best Treatment", as.character(best_treatment))) %>%
select(!flag_big) %>%
group_by(best_treatment, education) %>%
dplyr::summarize(n = sum(n), prop = sum(prop)) %>%
ungroup() %>%
mutate(percent = paste(round(prop * 100, 0), "%", sep = ""),
percent = ifelse(round(prop * 100, 0) < 6, "", percent),
p = rev(cumsum(rev(n/sum(n)))))
ggplot(df_plot, aes( x = as.numeric(education), y = n, fill =as.factor(best_treatment)))+
geom_bar(position = "fill", stat ="identity", color = "white", width = .7) +
custom_theme +
scale_fill_manual(values = c(cb_colors, "red", "green", "blue", "yellow"), labels = str_to_title)+
geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") +
scale_x_continuous(limits = c(NA, length(levels(df$education)) + .6), breaks = 1:length(levels(df$education)),
labels =str_replace_all(c(levels(df$education)), " ", "\n")) +
labs(x = "", y = "", fill= fill_label,
title = title_label,
caption = paste("Number of Observations: ", scales::comma(sum(df_plot$n)),"\n", caption_label)) +
scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) +
scale_color_manual(values = c(cb_colors, "red", "green", "blue", "yellow"))
df_plot <- df %>%
filter(if(!is.null(filter_phrase)) eval(parse(text = filter_phrase)) else TRUE) %>%
select(best_treatment, religiosity) %>%
filter(!is.na(religiosity)) %>%
group_by(best_treatment, religiosity) %>%
count() %>%
group_by(religiosity) %>%
mutate(prop = n/sum(n)) %>%
group_by(best_treatment) %>%
mutate(flag_big = ifelse(max(prop) > 1/11, 1, 0)) %>%
ungroup() %>%
mutate(best_treatment = ifelse(flag_big == 0, "Other Best Treatment", as.character(best_treatment))) %>%
select(!flag_big) %>%
group_by(best_treatment, religiosity) %>%
dplyr::summarize(n = sum(n), prop = sum(prop)) %>%
ungroup() %>%
mutate(percent = paste(round(prop * 100, 0), "%", sep = ""),
percent = ifelse(round(prop * 100, 0) < 6, "", percent),
p = rev(cumsum(rev(n/sum(n)))))
ggplot(df_plot, aes( x = as.numeric(religiosity), y = n, fill =as.factor(best_treatment)))+
geom_bar(position = "fill", stat ="identity", color = "white", width = .7) +
custom_theme +
scale_fill_manual(values = c(cb_colors, "red", "green", "blue", "yellow"), labels = str_to_title)+
geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") +
scale_x_continuous(limits = c(NA, length(levels(df$religiosity)) + .6), breaks = 1:length(levels(df$religiosity)),
labels =str_replace_all(c(levels(df$religiosity)), " ", "\n")) +
labs(x = "", y = "", fill= fill_label,
title = title_label,
caption = paste("Number of Observations: ", scales::comma(sum(df_plot$n)),"\n", caption_label)) +
scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) +
scale_color_manual(values = c(cb_colors, "red", "green", "blue", "yellow"))
df_plot <- df %>%
filter(if(!is.null(filter_phrase)) eval(parse(text = filter_phrase)) else TRUE) %>%
select(best_treatment, location) %>%
filter(!is.na(location)) %>%
group_by(best_treatment, location) %>%
count() %>%
group_by(location) %>%
mutate(prop = n/sum(n)) %>%
group_by(best_treatment) %>%
mutate(flag_big = ifelse(max(prop) > 1/11, 1, 0)) %>%
ungroup() %>%
mutate(best_treatment = ifelse(flag_big == 0, "Other Best Treatment", as.character(best_treatment))) %>%
select(!flag_big) %>%
group_by(best_treatment, location) %>%
dplyr::summarize(n = sum(n), prop = sum(prop)) %>%
ungroup() %>%
mutate(percent = paste(round(prop * 100, 0), "%", sep = ""),
percent = ifelse(round(prop * 100, 0) < 6, "", percent),
p = rev(cumsum(rev(n/sum(n)))))
ggplot(df_plot, aes( x = as.numeric(location), y = n, fill =as.factor(best_treatment)))+
geom_bar(position = "fill", stat ="identity", color = "white", width = .7) +
custom_theme +
scale_fill_manual(values = c(cb_colors, "red", "green", "blue", "yellow"), labels = str_to_title)+
geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") +
scale_x_continuous(limits = c(NA, length(levels(df$location))+ .6), breaks = 1:length(levels(df$location)),
labels =str_replace_all(c(levels(df$location)), " ", "\n")) +
labs(x = "", y = "", fill= fill_label,
title = title_label,
caption = paste("Number of Observations: ", scales::comma(sum(df_plot$n)),"\n", caption_label)) +
scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) +
scale_color_manual(values = c(cb_colors, "red", "green", "blue", "yellow"))
The following figures show the best treatment by various demographics, for unvaccinated participants. Additionally, observations with missing entries for a best treatment are removed. Grey bars show the percentage of missing observations.
stacked_bar6 <- function(df, variable, fill_label, title_label, caption_label){
variable <- enquo(variable)
df_plot <- df %>%
filter(vax_status == "unvax" & !is.na(motive_main)) %>%
select(motive_main, !!variable) %>%
group_by(!!variable, motive_main) %>%
count() %>%
group_by(motive_main) %>%
mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = ""),
percent = ifelse(round(n/sum(n) * 100, 0) < 6, "", percent),
p = rev(cumsum(rev(n/sum(n)))),
motive_main_num = as.numeric(motive_main))
ggplot(df_plot, aes( x = motive_main_num, y = n, fill =!!variable))+
geom_bar(position = "fill", stat ="identity", color = "white", width = .7) +
custom_theme +
scale_fill_manual(values = cb_colors, labels = str_to_title)+
scale_x_continuous(limits = c(NA, length(levels(df$motive_main)) + 2), breaks = 1:length(levels(df$motive_main)),
labels =str_replace_all(c(levels(df$motive_main)), " ", "\n")) +
geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") +
labs(x = "", y = "", fill = fill_label, title = title_label, caption = paste("Number of Observations:", scales::comma(sum(df_plot$n)), "\n", caption_label)) +
scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) +
geom_text_repel(data = . %>% filter(motive_main == "other") ,
aes(x = motive_main_num+0.3, y = p-0.01,
label = str_to_title(!!variable), color = !!variable),
nudge_x = 0.3, direction = "y", hjust = "left", size = 16/.pt)+
scale_color_manual(values = cb_colors) +
theme(legend.position = "none")
}
stacked_bar6(df = df, variable = gender, caption_label = "",
title_label = "Motive Impediment by Gender", fill_label = "")
stacked_bar6(df = df, variable = ethnicity, caption_label = "",
fill_label = "", title_label = "Motive Impediment by Ethnicity")
stacked_bar6(df = df, variable = education, caption_label = "",
fill_label = "", title_label = "Motive Impediment by Education")
stacked_bar6(df = df, variable = religiosity, caption_label = "",
fill_label = "", title_label = "Motive Impediment by Religiosity")
stacked_bar6(df = df, variable = location, caption_label = "",
fill_label = "", title_label = "Motive Impediment by Location")
Data used in this section is limited to unvaccinated participants who indicataed a motivation impediment.
stacked_bar5 <- function(df, variable, fill_label, title_label, caption_label){
variable <- enquo(variable)
df_plot <- df %>%
filter(vax_status == "unvax" & !is.na(!!variable) & motive == "no") %>%
select(motive_main, !!variable) %>%
group_by(!!variable, motive_main) %>%
count() %>%
group_by(!!variable) %>%
mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = ""),
percent = ifelse(round(n/sum(n) * 100, 0) < 6, "", percent),
p = rev(cumsum(rev(n/sum(n)))),
demo_num = as.numeric(!!variable))
ggplot(df_plot, aes( x = demo_num, y = n, fill =motive_main))+
geom_bar(position = "fill", stat ="identity", color = "white", width = .7) +
custom_theme +
scale_fill_manual(values = cb_colors, labels = str_to_title)+
scale_x_continuous(breaks = 1:length(levels(df %>% pull(!!variable))),
limits = c(NA, length(levels(df %>% pull(!!variable))) +2),
labels =str_replace_all(c(levels(df %>% pull(!!variable))), " ", "\n"))+
geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") +
labs(x = "", y = "", fill = fill_label, title = title_label, caption = paste("Number of Observations:", scales::comma(sum(df_plot$n)), "\n", caption_label)) +
scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) +
geom_text_repel(data = . %>% ungroup() %>%filter(demo_num == max(demo_num)) ,
aes(x = demo_num+0.3, y = p-0.01,
label = str_to_title(motive_main), color = motive_main),
nudge_x = 0.3, direction = "y", hjust = "left", size = 16/.pt)+
scale_color_manual(values = cb_colors) +
theme(legend.position = "none")
}
stacked_bar5(df, variable = gender, fill_label = "", caption_label = "", title_label = "Motive Impediment by Gender")
stacked_bar5(df, variable = ethnicity, fill_label = "", caption_label = "", title_label = "Motive Impediment by Ethnicity")
stacked_bar5(df, variable = education, fill_label = "", caption_label = "", title_label = "Motive Impediment by Education")
stacked_bar5(df, variable = religiosity, fill_label = "", caption_label = "", title_label = "Motive Impediment by Religiosity")
stacked_bar5(df, variable = location, fill_label = "", caption_label = "", title_label = "Motive Impediment by Location")
The following figures show the best treatment by various demographics, for unvaccinated participants. Additionally, observations with missing entries for a best treatment are removed. Grey bars show the percentage of missing observations.
stacked_bar6 <- function(df, variable, fill_label, title_label, caption_label){
variable <- enquo(variable)
df_plot <- df %>%
filter(vax_status == "unvax" & !is.na(ability_main)) %>%
select(ability_main, !!variable) %>%
group_by(!!variable, ability_main) %>%
count() %>%
group_by(ability_main) %>%
mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = ""),
percent = ifelse(round(n/sum(n) * 100, 0) < 6, "", percent),
p = rev(cumsum(rev(n/sum(n)))),
ability_main_num = as.numeric(ability_main))
ggplot(df_plot, aes( x = ability_main_num, y = n, fill =!!variable))+
geom_bar(position = "fill", stat ="identity", color = "white", width = .7) +
custom_theme +
scale_fill_manual(values = cb_colors, labels = str_to_title)+
scale_x_continuous(limits = c(NA, length(levels(df$ability_main)) + 2), breaks = 1:length(levels(df$ability_main)),
labels =str_replace_all(c(levels(df$ability_main)), " ", "\n")) +
geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") +
labs(x = "", y = "", fill = fill_label, title = title_label, caption = paste("Number of Observations:", scales::comma(sum(df_plot$n)), "\n", caption_label)) +
scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) +
geom_text_repel(data = . %>% filter(ability_main == "other") ,
aes(x = ability_main_num+0.3, y = p-0.01,
label = str_to_title(!!variable), color = !!variable),
nudge_x = 0.3, direction = "y", hjust = "left", size = 16/.pt)+
scale_color_manual(values = cb_colors) +
theme(legend.position = "none")
}
stacked_bar6(df = df, variable = gender, caption_label = "",
title_label = "Ability Impediment by Gender", fill_label = "")
stacked_bar6(df = df, variable = ethnicity, caption_label = "",
fill_label = "", title_label = "Ability Impediment by Ethnicity")
stacked_bar6(df = df, variable = education, caption_label = "",
fill_label = "", title_label = "Ability Impediment by Education")
stacked_bar6(df = df, variable = religiosity, caption_label = "",
fill_label = "", title_label = "Ability Impediment by Religiosity")
stacked_bar6(df = df, variable = location, caption_label = "",
fill_label = "", title_label = "Ability Impediment by Location")
The data in these visualizations is limited to unvaccinated participants who indicated an ability impediment.
stacked_bar5 <- function(df, variable, fill_label, title_label, caption_label){
variable <- enquo(variable)
df_plot <- df %>%
filter(vax_status == "unvax" & !is.na(!!variable) & ability %in% c("somewhat hard", "really hard")) %>%
select(ability_main, !!variable) %>%
group_by(!!variable, ability_main) %>%
count() %>%
group_by(!!variable) %>%
mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = ""),
percent = ifelse(round(n/sum(n) * 100, 0) < 6, "", percent),
p = rev(cumsum(rev(n/sum(n)))),
demo_num = as.numeric(!!variable))
ggplot(df_plot, aes( x = demo_num, y = n, fill =ability_main)) +
geom_bar(position = "fill", stat ="identity", color = "white", width = .7) +
custom_theme +
scale_fill_manual(values = cb_colors, labels = str_to_title)+
scale_x_continuous(breaks = 1:length(levels(df %>% pull(!!variable))),
limits = c(NA, length(levels(df %>% pull(!!variable))) +2),
labels =str_replace_all(c(levels(df %>% pull(!!variable))), " ", "\n"))+
geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") +
labs(x = "", y = "", fill = fill_label, title = title_label, caption = paste("Number of Observations:", scales::comma(sum(df_plot$n)), "\n", caption_label)) +
scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) +
geom_text_repel(data = . %>% ungroup() %>%filter(demo_num == max(demo_num)) ,
aes(x = demo_num+0.3, y = p-0.01,
label = str_to_title(ability_main), color = ability_main),
nudge_x = 0.3, direction = "y", hjust = "left", size = 16/.pt)+
scale_color_manual(values = cb_colors) +
theme(legend.position = "none")
}
stacked_bar5(df, variable = gender,
fill_label = "Ability Impediments by Gender",
title_label = "",
caption_label = "")
stacked_bar5(df, variable = ethnicity,
fill_label = "", title_label = "Ability Impediments by Ethnicity",
caption_label = "")
stacked_bar5(df, variable = education,
fill_label = "",
title_label = "Ability Impediments by Education",
caption_label = "")
stacked_bar5(df, variable = religiosity,
fill_label = "",
title_label = "Ability Impediments by Religiosity",
caption_label = "")
stacked_bar5(df, variable = location,
fill_label = "", title_label = "Ability Impediments by Location",
caption_label = "")