The purpose of this script is to run descriptive statistics for various demographics variables, impediments, and best treatments, on the full pilot data. The current script uses the pilot version 7 data as a placeholder until we get the full pilot data.
Insert Description of Full Pilot Here
Cleaning and Filtering
The data used in this script has already been cleaned in the data cleaning script.
This descriptive statistics script will use the version 7 data, filtered to completes only. Also, obsrvations with missing vax status are dropped. There are 2357 observations.
One additional cleaning step in this analysis remove observations for which vaccination status is “other” (1 observation for v7). We also run the factor_cols.R file, which provides order to variables (ie. location: urban > suburban > rural > other > prefer not to say), since these orderings are not saved in the csv files.
Starred variables have a corresponding variable, with the suffix _num (ie education_num) that provides a numeric ordering of the variables. This order is listed in parenthesis.
| Variable | Description |
|---|---|
| vax_status* | unvax (0) or unvax (1) |
| gender* | male (0) or female (1) |
| age | integer 1 to 99 |
| education* | < high school (1), high school (2), some college (3), 2-year degree (4), 4-year degree (5), graduate degree (6), other, prefer not to say |
| income* | <R5,000 (1), R5,000 – R9,999 (2), R10,000 – R29,999 (3), R30,000 – R49,999 (4), R50,000 – R99,999 (5), > R100,000 (6), other |
| location* | rural(1), suburban (2), urban(3), other, prefer not to say |
| politics* | conservative (1), moderate (2), liberal(3), other, prefer not to say |
| religiosity* | not very religious (1), somewhat religious (2), very religious (3), other, prefer not to say |
| white | 1 if the participant is a white or caucasian, 0 if not |
| ethnicity | asian or indian, black or african, coloured, white or caucasian, other, prefer not to say |
| Variable | Description |
|---|---|
| motive_main | benefit, risk, other |
| benefit_main | if motive_main == benefit: covid not dangerous, had covid already, unlikely to get sick, vaccines don’t work, other reason(s) |
| risk_main | if motive_main == risk: bad side effects, don’t trust pharma, needles/pain, not enough testing, vaccines don’t work, other reason(s), other |
| ability_main | availability, money, time, other |
| availability_main | if ability_main == availability: no vaccines left, too far away, other reason(s) |
| money_main | if ability_main == money: no cash, no insurance, travel costs, other reason(s) |
| time_main | if ability_main == time: childcare, getting off worke, no time to research, other reason(s), other |
| Variable | Description |
|---|---|
| best_treatment | family supports it, rewards for vaxxing, trusted info source, something else, nothering, other |
df_v7 %>%
select_if(is.numeric) %>%
select(vax_status = vax_status_num, age, education = education_num, income = income_num, location=location_num, politics = politics_num, religiosity = religiosity_num, gender = gender_num, white) %>%
describe(quant = c(.25,.75) ) %>%
select(n, mean, sd, se, min, min, first_quartile = Q0.25, median, third_quartile = Q0.75, max) %>%
clean_names(case = "title") %>%
rename(SD = Sd, SE = Se) %>%
kable(caption = "Summary Statistics for Continuous Demographic Variables", digits = 3)%>%
kable_styling(bootstrap_options = c("striped", "hover"))
| N | Mean | SD | SE | Min | First Quartile | Median | Third Quartile | Max | |
|---|---|---|---|---|---|---|---|---|---|
| vax_status | 2357 | 0.573 | 0.495 | 0.010 | 0 | 0 | 1 | 1 | 1 |
| age | 2257 | 29.239 | 10.999 | 0.232 | 18 | 21 | 26 | 34 | 83 |
| education | 2191 | 2.291 | 1.304 | 0.028 | 1 | 1 | 2 | 3 | 6 |
| income | 1722 | 2.066 | 1.526 | 0.037 | 1 | 1 | 1 | 3 | 6 |
| location | 2192 | 2.046 | 0.853 | 0.018 | 1 | 1 | 2 | 3 | 3 |
| politics | 1298 | 1.912 | 0.707 | 0.020 | 1 | 1 | 2 | 2 | 3 |
| religiosity | 1972 | 2.327 | 0.761 | 0.017 | 1 | 2 | 3 | 3 | 3 |
| gender | 2340 | 0.512 | 0.500 | 0.010 | 0 | 0 | 1 | 1 | 1 |
| white | 2357 | 0.076 | 0.264 | 0.005 | 0 | 0 | 0 | 0 | 1 |
df_v7 %>%
select(vax_status, age, education = education_num, income = income_num, location=location_num, politics = politics_num, religiosity = religiosity_num, gender = gender_num, white) %>%
split(.$vax_status) %>%
map(describe) %>%
do.call(rbind, .) %>%
data.frame() %>%
select(n, mean , se) %>%
rownames_to_column() %>%
separate(rowname, c("vax_status", "variable"), sep = "\\.") %>%
filter(variable != "vax_status*") %>%
pivot_longer(cols = c("n", "mean", "se")) %>%
mutate(column = paste(vax_status, name, sep = "_")) %>%
select(column, variable, value) %>%
pivot_wider(names_from = column) %>%
kable(digits = 3,caption = "Summary Statistics for Continuous Demographic Variables by Vaccination Status",
col.names = c("", "N", "Mean", "SE", "N", "Mean", "SE")) %>%
kable_styling(bootstrap_options = c("striped", "hover"))%>%
add_header_above(c(" " = 1, "Unvaccinated" = 3, "Vaccinated" = 3))
| N | Mean | SE | N | Mean | SE | |
|---|---|---|---|---|---|---|
| age | 960 | 28.413 | 0.336 | 1297 | 29.850 | 0.316 |
| education | 934 | 2.336 | 0.045 | 1257 | 2.257 | 0.035 |
| income | 691 | 2.155 | 0.061 | 1031 | 2.006 | 0.046 |
| location | 920 | 2.023 | 0.028 | 1272 | 2.062 | 0.024 |
| politics | 498 | 1.972 | 0.033 | 800 | 1.875 | 0.024 |
| religiosity | 814 | 2.303 | 0.027 | 1158 | 2.344 | 0.022 |
| gender | 997 | 0.463 | 0.016 | 1343 | 0.548 | 0.014 |
| white | 1006 | 0.094 | 0.009 | 1351 | 0.061 | 0.007 |
df_v7 %>%
select(gender, income, ethnicity, education, religion, politics, location) %>%
summarize_factor() %>%
data.frame() %>%
rename(variable = "X.", Percent = "X..2") %>%
select(!X..1) %>%
mutate(variable = ifelse(variable == "", NA, variable)) %>%
fill(data = ., variable) %>%
arrange(variable) %>%
kable(digits = 3,caption = "Summary Statistics for Factor Demographic Variables",
col.names = c("", "Value", "N", "Percent")) %>%
kable_styling(bootstrap_options = c("striped", "hover"))%>%
collapse_rows() %>%
scroll_box( height = "500px")
| Value | N | Percent | |
|---|---|---|---|
| education | < high school | 635 | 26.9 |
| high school | 859 | 36.4 | |
| some college | 429 | 18.2 | |
| 2-year degree | 80 | 3.4 | |
| 4-year degree | 69 | 2.9 | |
| graduate degree | 119 | 5.0 | |
| other | 68 | 2.9 | |
| prefer not to say | 98 | 4.2 | |
| ethnicity | asian or indian | 54 | 2.3 |
| black or african | 1742 | 73.9 | |
| coloured | 228 | 9.7 | |
| white or caucasian | 178 | 7.6 | |
| other | 100 | 4.2 | |
| prefer not to say | 55 | 2.3 | |
| gender | female | 1198 | 50.8 |
| male | 1142 | 48.5 | |
| <Missing> | 17 | 0.7 | |
| income | < R5,000 | 951 | 40.3 |
| R5,000 – R9,999 | 286 | 12.1 | |
| R10,000 – R29,999 | 215 | 9.1 | |
| R30,000 – R49,999 | 72 | 3.1 | |
| R50,000 – R99,999 | 87 | 3.7 | |
| > R100,000 | 111 | 4.7 | |
| other | 98 | 4.2 | |
| <Missing> | 537 | 22.8 | |
| location | urban | 850 | 36.1 |
| suburban | 592 | 25.1 | |
| rural | 750 | 31.8 | |
| other | 34 | 1.4 | |
| prefer not to say | 131 | 5.6 | |
| politics | conservative | 386 | 16.4 |
| moderate | 640 | 27.2 | |
| liberal | 272 | 11.5 | |
| other | 31 | 1.3 | |
| prefer not to say | 1028 | 43.6 | |
| religion | african traditional | 241 | 10.2 |
| christian | 1778 | 75.4 | |
| hinduism | 19 | 0.8 | |
| islam | 56 | 2.4 | |
| no religion | 120 | 5.1 | |
| other | 70 | 3.0 | |
| prefer not to say | 72 | 3.1 | |
| <Missing> | 1 | 0.0 |
df_v7 %>%
select(vax_status, gender, income, ethnicity, education, religion, politics, location) %>%
split(.$vax_status) %>%
map(summarize_factor) %>%
do.call(rbind, .) %>%
data.frame() %>%
rownames_to_column("vax_status") %>%
rename(variable= "X.", Percent = "X..2") %>%
select(!X..1) %>%
mutate(variable = ifelse(variable == "", NA, variable)) %>%
fill(data = ., variable) %>%
mutate(vax_status = str_extract(vax_status, "unvax|vax"),
Percent= as.numeric(Percent)) %>%
pivot_longer(cols = c("N", "Percent")) %>%
mutate(column = paste(vax_status, name, sep = "_")) %>%
select(column, variable, Level, value) %>%
pivot_wider(names_from = "column") %>%
as.data.frame() %>%
arrange(variable) %>%
kable(digits = 3,caption = "Summary Statistics for Factor Demographic Variables by Vaccination Status",
col.names = c("", "Value", "N", "Percent", "N", "Percent")) %>%
kable_styling(bootstrap_options = c("striped", "hover"))%>%
add_header_above(c(" " = 2, "Unvaccinated" = 2, "Vaccinated" = 2))%>%
collapse_rows() %>%
scroll_box( height = "500px")
| Value | N | Percent | N | Percent | |
|---|---|---|---|---|---|
| education | < high school | 255 | 25.3 | 380 | 28.1 |
| high school | 394 | 39.2 | 465 | 34.4 | |
| some college | 160 | 15.9 | 269 | 19.9 | |
| 2-year degree | 32 | 3.2 | 48 | 3.6 | |
| 4-year degree | 27 | 2.7 | 42 | 3.1 | |
| graduate degree | 66 | 6.6 | 53 | 3.9 | |
| other | 22 | 2.2 | 46 | 3.4 | |
| prefer not to say | 50 | 5.0 | 48 | 3.6 | |
| ethnicity | asian or indian | 20 | 2.0 | 34 | 2.5 |
| black or african | 722 | 71.8 | 1020 | 75.5 | |
| coloured | 101 | 10.0 | 127 | 9.4 | |
| white or caucasian | 95 | 9.4 | 83 | 6.1 | |
| other | 30 | 3.0 | 70 | 5.2 | |
| prefer not to say | 38 | 3.8 | 17 | 1.3 | |
| gender | female | 462 | 45.9 | 736 | 54.5 |
| male | 535 | 53.2 | 607 | 44.9 | |
| <Missing> | 9 | 0.9 | 8 | 0.6 | |
| income | < R5,000 | 375 | 37.3 | 576 | 42.6 |
| R5,000 – R9,999 | 106 | 10.5 | 180 | 13.3 | |
| R10,000 – R29,999 | 86 | 8.5 | 129 | 9.5 | |
| R30,000 – R49,999 | 30 | 3.0 | 42 | 3.1 | |
| R50,000 – R99,999 | 40 | 4.0 | 47 | 3.5 | |
| > R100,000 | 54 | 5.4 | 57 | 4.2 | |
| other | 32 | 3.2 | 66 | 4.9 | |
| <Missing> | 283 | 28.1 | 254 | 18.8 | |
| location | urban | 347 | 34.5 | 503 | 37.2 |
| suburban | 247 | 24.6 | 345 | 25.5 | |
| rural | 326 | 32.4 | 424 | 31.4 | |
| other | 9 | 0.9 | 25 | 1.9 | |
| prefer not to say | 77 | 7.7 | 54 | 4.0 | |
| politics | conservative | 138 | 13.7 | 248 | 18.4 |
| moderate | 236 | 23.5 | 404 | 29.9 | |
| liberal | 124 | 12.3 | 148 | 11.0 | |
| other | 12 | 1.2 | 19 | 1.4 | |
| prefer not to say | 496 | 49.3 | 532 | 39.4 | |
| religion | african traditional | 128 | 12.7 | 113 | 8.4 |
| christian | 693 | 68.9 | 1085 | 80.3 | |
| hinduism | 4 | 0.4 | 15 | 1.1 | |
| islam | 33 | 3.3 | 23 | 1.7 | |
| no religion | 66 | 6.6 | 54 | 4.0 | |
| other | 42 | 4.2 | 28 | 2.1 | |
| prefer not to say | 40 | 4.0 | 32 | 2.4 | |
| <Missing> | NA | NA | 1 | 0.1 |
The following tabs show the correlations between demographics, and between demographics and motivation and ability impediments.
numeric_cols <- c("gender_num", "income_num", "education_num",
"religiosity_num", "politics_num", "location_num",
"white", "vax_status_num"
)
df_numeric_demo <- df_v7 %>%
select(all_of(numeric_cols))
colnames(df_numeric_demo) <-str_to_title(str_replace(str_remove(colnames(df_numeric_demo), "_num"), "_", " "))
ggcorrplot(cor(df_numeric_demo, use = "pairwise.complete.obs"), type = "lower", lab = TRUE, lab_size = 12/.pt, tl.cex = 10, colors= c(cb_colors[1], "white", cb_colors[2])) +
custom_theme +
labs(y = "", x = "", title = "Correlation Matrix: \nFor Demographic Variables")+
theme(axis.text.x = element_text(angle = 45, hjust=1))
risky <- df_v7 %>%
mutate(
risky = if_else(motive_main == "risk", 1L, 0L)) %>%
select(risky, gender_num, income_num, education_num, religiosity_num, politics_num, location_num, white) %>%
cor(use = "pairwise.complete.obs") %>%
.[1, 2:8]
against_beliefs <- df_v7 %>%
mutate(
against_beliefs = if_else(motive_main == "belief", 1L, 0L)) %>%
select(against_beliefs, gender_num, income_num, education_num, religiosity_num, politics_num, location_num, white) %>%
cor(use = "pairwise.complete.obs") %>%
.[1, 2:8]
no_benefits <- df_v7 %>%
mutate(
no_benefits = if_else(motive_main == "benefit", 1L, 0L)) %>%
select(no_benefits, gender_num, income_num, education_num, religiosity_num, politics_num, location_num, white) %>%
cor(use = "pairwise.complete.obs") %>%
.[1, 2:8]
final_mat <-
tibble(
against_beliefs,
no_benefits, risky
) %>%
as.matrix()
rownames(final_mat) <- c("gender", "income", "education", "religiosity", "politics", "location", "white")
colnames(final_mat) <- c( "against_beliefs", "no_benefits", "risky")
ggcorrplot(final_mat, lab = TRUE, lab_size = 12/.pt, tl.cex = 10, colors= c(cb_colors[1], "white", cb_colors[2])) +
custom_theme +
labs(y = "", x = "", title = "Correlation Matrix: \nMotivation and Demographic Variables", caption = "Against beliefs is removed do to their being no correlation. \nIf the full pilot does show a relationship, it will visualized here.")+
theme(axis.text.x = element_text(angle = 45, hjust=1))
availability <- df_v7 %>%
mutate(
availability = if_else(ability_main == "availability", 1L, 0L)) %>%
select(availability, gender_num, income_num, education_num, religiosity_num, politics_num, location_num, white) %>%
cor(use = "pairwise.complete.obs") %>%
.[1, 2:8]
money <- df_v7 %>%
mutate(
money = if_else(ability_main == "money", 1L, 0L)) %>%
select(money, gender_num, income_num, education_num, religiosity_num, politics_num, location_num, white) %>%
cor(use = "pairwise.complete.obs") %>%
.[1, 2:8]
time <- df_v7 %>%
mutate(
time = if_else(ability_main == "time", 1L, 0L)) %>%
select(time, gender_num, income_num, education_num, religiosity_num, politics_num, location_num, white) %>%
cor(use = "pairwise.complete.obs") %>%
.[1, 2:8]
final_mat <-
tibble(
availability,
money, time
) %>%
as.matrix()
rownames(final_mat) <- c("gender", "income", "education", "religiosity", "politics", "location", "white")
colnames(final_mat) <- c( "availability", "money", "time")
ggcorrplot(final_mat, lab = TRUE, lab_size = 12/.pt, tl.cex = 10, colors= c(cb_colors[1], "white", cb_colors[2])) +
custom_theme +
labs(y = "", x = "", title = "Correlation Matrix: \nAbility and Demographic Variables")+
theme(axis.text.x = element_text(angle = 45, hjust=1))
Next, we show the number of participants in each of the twelve forks, and the percentage of participants at the vaccination status level.
df_v7 %>%
group_by(vax_status, ability, motive) %>%
count() %>%
drop_na(ability, motive) %>%
ungroup() %>%
group_by(vax_status) %>%
mutate(`% of vax status` = round(100 * n/sum(n, na.rm = T), 2),
ability = str_to_title(ifelse(ability == "yes", "ability", "no ability")),
motive = str_to_title(ifelse(motive == "yes", "motive", "no motive")),
vax_status = str_to_title(ifelse(vax_status == "vax", "Vaccinated", "Unvaccinated"))) %>%
ungroup() %>%
arrange(vax_status, desc(n)) %>%
kable(col.names =
c("", "", "", "Count", "Percentage within Vaccination Status"),
align = "c",
caption = "Distribution of forking segments of impediments") %>%
kable_styling()
| Count | Percentage within Vaccination Status | |||
|---|---|---|---|---|
| Unvaccinated | Ability | No Motive | 684 | 67.99 |
| Unvaccinated | No Ability | No Motive | 260 | 25.84 |
| Unvaccinated | Ability | Motive | 42 | 4.17 |
| Unvaccinated | No Ability | Motive | 20 | 1.99 |
| Vaccinated | Ability | Motive | 733 | 54.34 |
| Vaccinated | Ability | No Motive | 431 | 31.95 |
| Vaccinated | No Ability | Motive | 98 | 7.26 |
| Vaccinated | No Ability | No Motive | 87 | 6.45 |
stacked_bar <- function(df, variable,filter_phrase, fill_label, title_label, caption_label){
variable <- enquo(variable)
df %>%
filter(if(!is.null(filter_phrase)) eval(parse(text = filter_phrase)) else TRUE) %>%
select(!!variable, vax_status, vax_status_num) %>%
group_by(!!variable, vax_status, vax_status_num) %>%
count() %>%
group_by(vax_status_num, vax_status) %>%
mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = ""),
percent = ifelse(round(n/sum(n) * 100, 0) < 6, "", percent)) %>%
ggplot(aes( x = vax_status_num, y = n, fill =as.factor(!!variable)))+
geom_bar(position = "fill", stat ="identity", color = "white", width = .7) +
custom_theme +
scale_x_continuous(breaks = c(0, 1), labels = c("Unvaccinated", "Vaccinated"))+
scale_fill_manual(values = cb_colors, labels = str_to_title)+
geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") +
labs(x = "", y = "", fill= fill_label,
title = title_label,
caption = caption_label) +
scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%"))
}
stacked_bar2 <- function(df, variable,filter_phrase, fill_label, title_label, caption_label){
variable <- enquo(variable)
df %>%
filter(if(!is.null(filter_phrase)) eval(parse(text = filter_phrase)) else TRUE) %>%
select(!!variable, vax_status, vax_status_num) %>%
group_by(!!variable, vax_status, vax_status_num) %>%
count() %>%
group_by(vax_status_num, vax_status) %>%
mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = ""),
percent = ifelse(round(n/sum(n) * 100, 0) < 6, "", percent),
p = rev(cumsum(rev(n/sum(n))))) %>%
ggplot(aes( x = vax_status_num, y = n, fill =as.factor(!!variable)))+
geom_bar(position = "fill", stat ="identity", color = "white", width = .7) +
custom_theme +
scale_x_continuous(breaks = c(0, 1), labels = c("Unvaccinated", "Vaccinated"), limits = c(NA, 2.4))+
scale_fill_manual(values = cb_colors, labels = str_to_title)+
geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") +
labs(x = "", y = "", fill= fill_label,
title = title_label,
caption = caption_label) +
scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) +
geom_text_repel(data = . %>% filter(vax_status == "vax") , aes(x = vax_status_num+0.3, y = p- 0.01,
label = str_to_title(!!variable), color = !!variable),
nudge_x = 0.15, direction = "y", hjust = "left", size = 16/.pt) +
scale_color_manual(values = cb_colors) +
theme(legend.position = "none")
}
First, we show the percentage of participants by vaccination status.
df_v7 %>%
select(vax_status) %>%
group_by(vax_status) %>%
count() %>%
ungroup() %>%
mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = "")) %>%
ggplot(aes( x = 1, y = n, fill = vax_status))+
geom_bar(position = "fill", stat ="identity", width = .7, color = "white") +
geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") +
coord_flip() +
custom_theme +
scale_x_continuous(breaks = c(1), limits = c(0.6, 1.8)) +
scale_fill_manual(values = c(cb_colors[3], cb_colors[6]), labels = c("Unvax", "Vax")) +
labs(x = "", y ="", title = "Percentage of Participants by Vaccination Status")+
geom_label(aes(x = 1.6, y = .29), label = "Vaccinated", size = 16/.pt, label.size = NA, fill = "white", color = cb_colors[6])+
geom_label(aes(x = 1.6, y = 0.79), label = "Unvaccinated", label.size = NA, fill = "white", color = cb_colors[3], size = 16/.pt) +
theme(legend.position = "none",
axis.text.y = element_blank()) +
scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%"))
This section visualizes various demographic variables by vaccination status.
df_v7 %>%
stacked_bar(variable = gender,
filter_phrase = NULL,
fill_label = "Gender",
title_label= "Share of Participants by Gender\nand Vaccination Status",
caption_label= "")
df_v7 %>%
select(age, vax_status) %>%
ggplot(aes(x = age, group = vax_status, fill = vax_status)) +
geom_density(alpha = 0.5) +
custom_theme +
scale_x_continuous(limits = c(0, 100), breaks = seq(0, 100, 10))+
scale_fill_manual(values = c(cb_colors[3], cb_colors[6]))+
labs(x = "Age", y = "Density", fill= "Vax Status",
title = paste("Age Distribution by <span style='color:", cb_colors[6], ";'>Vaccinated</span> and <span style='color:", cb_colors[3], ";'>Unvaccinated</span>", sep = "")) +
theme(legend.position = "none",
plot.title = element_markdown(size = 20))
df_v7 %>%
stacked_bar(variable = education,
filter_phrase = NULL,
fill_label = "Education",
title_label= "Share of Participants by Education\nand Vaccination Status",
caption_label= "")
df_v7 %>%
stacked_bar(variable = religiosity,
filter_phrase = NULL,
fill_label = "Religiosity",
title_label= "Share of Participants by Religiosity\nand Vaccination Status",
caption_label= "")
df_v7 %>%
stacked_bar(variable = religion,
filter_phrase = NULL,
fill_label = "Religion",
title_label= "Share of Participants by Religion\nand Vaccination Status",
caption_label= "")
df_v7 %>%
stacked_bar(variable = income,
filter_phrase = NULL,
fill_label = "Income",
title_label= "Share of Participants by Income\nand Vaccination Status",
caption_label= "")
df_v7 %>%
stacked_bar(variable = location,
filter_phrase = NULL,
fill_label = "Location",
title_label= "Share of Participants by Location\nand Vaccination Status",
caption_label= "")
df_v7 %>%
stacked_bar(variable = ethnicity,
filter_phrase = NULL,
fill_label = "Ethnicity",
title_label= "Share of Participants by Ethnicity\nand Vaccination Status",
caption_label= "")
df_v7 %>%
stacked_bar(variable = politics,
filter_phrase = NULL,
fill_label = "Politics",
title_label= "Share of Participants by Politics\nand Vaccination Status",
caption_label= "")
This section visualizes various demographic variables by motivation impediments. The data used in this section are limited to participants who indicated a motive impediment.
df_v7 %>%
stacked_bar2(variable = motive_main,
filter_phrase = 'motive == "no"',
fill_label = "",
title_label= "Share of Participants by Motive\nand Vaccination Status",
caption_label= "")
For participants who identified their main motive impediment as risk.
df_v7 %>%
stacked_bar2(variable = risk_main,
filter_phrase = 'motive == "no" & motive_main == "risk"',
fill_label = "",
title_label= "Share of Participants by Risk\nand Vaccination Status",
caption_label= "")
For participants who identified their main motive impediment as benefit.
df_v7 %>%
stacked_bar2(variable = benefit_main,
filter_phrase = 'motive == "no" & motive_main == "benefit"',
fill_label = "",
title_label= "Share of Participants by Benefit\nand Vaccination Status",
caption_label= "")
This section visualizes various demographic variables by ability impediments. The data used in this section are limited to participants who indicated a ability impediment.
df_v7 %>%
stacked_bar2(variable = ability_main,
filter_phrase = 'ability == "no"',
fill_label = "",
title_label= "Share of Participants by Ability\nand Vaccination Status",
caption_label= "")
For participants who identified their main ability impediment as availability.
df_v7 %>%
stacked_bar2(variable = availability_main,
filter_phrase = 'ability == "no" & ability_main == "availability"',
fill_label = "",
title_label= "Share of Participants by Availability\nand Vaccination Status",
caption_label= "")
For participants who identified their main ability impediment as money.
df_v7 %>%
stacked_bar2(variable = money_main,
filter_phrase = 'ability == "no" & ability_main == "money"',
fill_label = "",
title_label= "Share of Participants by Money\nand Vaccination Status",
caption_label= "")
For participants who identified their main ability impediment as time.
df_v7 %>%
stacked_bar2(variable = time_main,
filter_phrase = 'ability == "no" & ability_main == "time"',
fill_label = "",
title_label= "Share of Participants by Time\nand Vaccination Status",
caption_label= "")
This figure shows the percentage of participants by best treatment.
df_v7 %>%
stacked_bar2(variable = best_treatment,
filter_phrase = NULL,
fill_label = "Best Treatment",
title_label= "Percentage of Participants by Best Treatment\nand Vaccination Status",
caption_label= "")
This section visualizas demographics by best treatment, motivation impediments, and ability impediments for the unvaccinated participants.
The following figures show the best treatment by various demographics, for unvaccinated participants. Additionally, observations with missing entries for a best treatment are removed. Grey bars show the percentage of missing observations.
stacked_bar3 <- function(df, variable, fill_label, title_label, caption_label){
variable <- enquo(variable)
df %>%
filter(vax_status == "unvax" & !is.na(best_treatment)) %>%
select(best_treatment, !!variable) %>%
group_by(!!variable, best_treatment) %>%
count() %>%
group_by(best_treatment) %>%
mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = ""),
percent = ifelse(round(n/sum(n) * 100, 0) < 6, "", percent),
p = rev(cumsum(rev(n/sum(n)))),
best_treatment_num = as.numeric(best_treatment)) %>%
ggplot(aes( x = best_treatment_num, y = n, fill =!!variable))+
geom_bar(position = "fill", stat ="identity", color = "white", width = .7) +
custom_theme +
scale_fill_manual(values = cb_colors, labels = str_to_title)+
scale_x_continuous(limits = c(NA, 8), breaks = 1:6, labels =str_replace_all(c(levels(df_v7$best_treatment)), " ", "\n")) +
geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") +
labs(x = "", y = "", fill = fill_label, title = title_label, caption = caption_label) +
scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) +
geom_text_repel(data = . %>% filter(best_treatment == "other") ,
aes(x = best_treatment_num+0.3, y = p-0.01,
label = str_to_title(!!variable), color = !!variable),
nudge_x = 0.3, direction = "y", hjust = "left", size = 16/.pt)+
scale_color_manual(values = cb_colors) +
theme(legend.position = "none")
}
stacked_bar3(df = df_v7,
variable = gender,
fill_label ="",
title_label = "Gender by Best Treatment", caption_label = "")
stacked_bar3(df = df_v7,
variable = ethnicity,
fill_label ="",
title_label = "Ethnicity by Best Treatment", caption_label = "")
stacked_bar3(df = df_v7,
variable = education,
fill_label ="",
title_label = "Education by Best Treatment", caption_label = "")
stacked_bar3(df = df_v7,
variable = location,
fill_label ="",
title_label = "Location by Best Treatment", caption_label = "")
stacked_bar3(df = df_v7,
variable = politics,
fill_label ="",
title_label = "Politics by Best Treatment", caption_label = "")
stacked_bar3(df = df_v7,
variable = religiosity,
fill_label ="",
title_label = "Religiosity by Best Treatment", caption_label = "")
The following figures show the best treatment by various demographics, for the unvaccinated group. Additionally, observations with missing entries for a given demographic variable are removed. Grey bars show the percentage of missing observations.
stacked_bar4 <- function(df, variable, fill_label, title_label, caption_label){
variable <- enquo(variable)
df %>%
filter(vax_status == "unvax" & !is.na(!!variable)) %>%
select(best_treatment, !!variable) %>%
group_by(!!variable, best_treatment) %>%
count() %>%
group_by(!!variable) %>%
mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = ""),
percent = ifelse(round(n/sum(n) * 100, 0) < 6, "", percent),
p = rev(cumsum(rev(n/sum(n)))),
demo_num = as.numeric(!!variable)) %>%
ggplot(aes( x = demo_num, y = n, fill =best_treatment))+
geom_bar(position = "fill", stat ="identity", color = "white", width = .7) +
custom_theme +
scale_fill_manual(values = cb_colors, labels = str_to_title)+
scale_x_continuous(breaks = 1:length(levels(df_v7 %>% pull(!!variable))),
limits = c(NA, length(levels(df_v7 %>% pull(!!variable))) +2),
labels =str_replace_all(c(levels(df_v7 %>% pull(!!variable))), " ", "\n"))+
geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") +
labs(x = "", y = "", fill = fill_label, title = title_label, caption = caption_label) +
scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) +
geom_text_repel(data = . %>% ungroup() %>%filter(demo_num == max(demo_num)) ,
aes(x = demo_num+0.3, y = p-0.01,
label = str_to_title(best_treatment), color = best_treatment),
nudge_x = 0.3, direction = "y", hjust = "left", size = 16/.pt)+
scale_color_manual(values = cb_colors) +
theme(legend.position = "none")
}
stacked_bar4(df = df_v7, variable = gender, fill_label = "", title_label = "Best Treatment by Gender", caption_label = "")
stacked_bar4(df = df_v7, variable = ethnicity, fill_label = "", title_label = "Best Treatment by Ethnicity", caption_label = "")
stacked_bar4(df = df_v7, variable = education, fill_label = "", title_label = "Best Treatment by Education", caption_label = "")
stacked_bar4(df = df_v7, variable = religiosity, fill_label = "", title_label = "Best Treatment by Religiosity", caption_label = "")
stacked_bar4(df = df_v7, variable = politics, fill_label = "", title_label = "Best Treatment by Politics", caption_label = "")
stacked_bar4(df = df_v7, variable = location, fill_label = "", title_label = "Best Treatment by Location", caption_label = "")
The following figures show the best treatment by various demographics, for unvaccinated participants. Additionally, observations with missing entries for a best treatment are removed. Grey bars show the percentage of missing observations.
stacked_bar6 <- function(df, variable, fill_label, title_label, caption_label){
variable <- enquo(variable)
df %>%
filter(vax_status == "unvax" & !is.na(motive_main)) %>%
select(motive_main, !!variable) %>%
group_by(!!variable, motive_main) %>%
count() %>%
group_by(motive_main) %>%
mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = ""),
percent = ifelse(round(n/sum(n) * 100, 0) < 6, "", percent),
p = rev(cumsum(rev(n/sum(n)))),
motive_main_num = as.numeric(motive_main)) %>%
ggplot(aes( x = motive_main_num, y = n, fill =!!variable))+
geom_bar(position = "fill", stat ="identity", color = "white", width = .7) +
custom_theme +
scale_fill_manual(values = cb_colors, labels = str_to_title)+
scale_x_continuous(limits = c(NA, length(levels(df_v7$motive_main)) + 2), breaks = 1:length(levels(df_v7$motive_main)),
labels =str_replace_all(c(levels(df_v7$motive_main)), " ", "\n")) +
geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") +
labs(x = "", y = "", fill = fill_label, title = title_label, caption = caption_label) +
scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) +
geom_text_repel(data = . %>% filter(motive_main == "other") ,
aes(x = motive_main_num+0.3, y = p-0.01,
label = str_to_title(!!variable), color = !!variable),
nudge_x = 0.3, direction = "y", hjust = "left", size = 16/.pt)+
scale_color_manual(values = cb_colors) +
theme(legend.position = "none")
}
stacked_bar6(df = df_v7, variable = gender, caption_label = "",
title_label = "Motive Impediment by Gender", fill_label = "")
stacked_bar6(df = df_v7, variable = ethnicity, caption_label = "",
fill_label = "", title_label = "Motive Impediment by Ethnicity")
stacked_bar6(df = df_v7, variable = education, caption_label = "",
fill_label = "", title_label = "Motive Impediment by Education")
stacked_bar6(df = df_v7, variable = religiosity, caption_label = "",
fill_label = "", title_label = "Motive Impediment by Religiosity")
stacked_bar6(df = df_v7, variable = politics, caption_label = "",
fill_label = "", title_label = "Motive Impediment by Politics")
stacked_bar6(df = df_v7, variable = location, caption_label = "",
fill_label = "", title_label = "Motive Impediment by Location")
Data used in this section is limited to unvaccinated participants who indicataed a motivation impediment.
stacked_bar5 <- function(df, variable, fill_label, title_label, caption_label){
variable <- enquo(variable)
df %>%
filter(vax_status == "unvax" & !is.na(!!variable) & motive == "no") %>%
select(motive_main, !!variable) %>%
group_by(!!variable, motive_main) %>%
count() %>%
group_by(!!variable) %>%
mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = ""),
percent = ifelse(round(n/sum(n) * 100, 0) < 6, "", percent),
p = rev(cumsum(rev(n/sum(n)))),
demo_num = as.numeric(!!variable)) %>%
ggplot(aes( x = demo_num, y = n, fill =motive_main))+
geom_bar(position = "fill", stat ="identity", color = "white", width = .7) +
custom_theme +
scale_fill_manual(values = cb_colors, labels = str_to_title)+
scale_x_continuous(breaks = 1:length(levels(df_v7 %>% pull(!!variable))),
limits = c(NA, length(levels(df_v7 %>% pull(!!variable))) +2),
labels =str_replace_all(c(levels(df_v7 %>% pull(!!variable))), " ", "\n"))+
geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") +
labs(x = "", y = "", fill = fill_label, title = title_label, caption = caption_label) +
scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) +
geom_text_repel(data = . %>% ungroup() %>%filter(demo_num == max(demo_num)) ,
aes(x = demo_num+0.3, y = p-0.01,
label = str_to_title(motive_main), color = motive_main),
nudge_x = 0.3, direction = "y", hjust = "left", size = 16/.pt)+
scale_color_manual(values = cb_colors) +
theme(legend.position = "none")
}
stacked_bar5(df_v7, variable = gender, fill_label = "", caption_label = "", title_label = "Motive Impediment by Gender")
stacked_bar5(df_v7, variable = ethnicity, fill_label = "", caption_label = "", title_label = "Motive Impediment by Ethnicity")
stacked_bar5(df_v7, variable = education, fill_label = "", caption_label = "", title_label = "Motive Impediment by Education")
stacked_bar5(df_v7, variable = religiosity, fill_label = "", caption_label = "", title_label = "Motive Impediment by Religiosity")
stacked_bar5(df_v7, variable = location, fill_label = "", caption_label = "", title_label = "Motive Impediment by Location")
stacked_bar5(df_v7, variable = politics, fill_label = "", caption_label = "", title_label = "Motive Impediment by Politics")
The following figures show the best treatment by various demographics, for unvaccinated participants. Additionally, observations with missing entries for a best treatment are removed. Grey bars show the percentage of missing observations.
stacked_bar6 <- function(df, variable, fill_label, title_label, caption_label){
variable <- enquo(variable)
df %>%
filter(vax_status == "unvax" & !is.na(ability_main)) %>%
select(ability_main, !!variable) %>%
group_by(!!variable, ability_main) %>%
count() %>%
group_by(ability_main) %>%
mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = ""),
percent = ifelse(round(n/sum(n) * 100, 0) < 6, "", percent),
p = rev(cumsum(rev(n/sum(n)))),
ability_main_num = as.numeric(ability_main)) %>%
ggplot(aes( x = ability_main_num, y = n, fill =!!variable))+
geom_bar(position = "fill", stat ="identity", color = "white", width = .7) +
custom_theme +
scale_fill_manual(values = cb_colors, labels = str_to_title)+
scale_x_continuous(limits = c(NA, length(levels(df_v7$ability_main)) + 2), breaks = 1:length(levels(df_v7$ability_main)),
labels =str_replace_all(c(levels(df_v7$ability_main)), " ", "\n")) +
geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") +
labs(x = "", y = "", fill = fill_label, title = title_label, caption = caption_label) +
scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) +
geom_text_repel(data = . %>% filter(ability_main == "other") ,
aes(x = ability_main_num+0.3, y = p-0.01,
label = str_to_title(!!variable), color = !!variable),
nudge_x = 0.3, direction = "y", hjust = "left", size = 16/.pt)+
scale_color_manual(values = cb_colors) +
theme(legend.position = "none")
}
stacked_bar6(df = df_v7, variable = gender, caption_label = "",
title_label = "Ability Impediment by Gender", fill_label = "")
stacked_bar6(df = df_v7, variable = ethnicity, caption_label = "",
fill_label = "", title_label = "Ability Impediment by Ethnicity")
stacked_bar6(df = df_v7, variable = education, caption_label = "",
fill_label = "", title_label = "Ability Impediment by Education")
stacked_bar6(df = df_v7, variable = religiosity, caption_label = "",
fill_label = "", title_label = "Ability Impediment by Religiosity")
stacked_bar6(df = df_v7, variable = politics, caption_label = "",
fill_label = "", title_label = "Ability Impediment by Politics")
stacked_bar6(df = df_v7, variable = location, caption_label = "",
fill_label = "", title_label = "Ability Impediment by Location")
The data in these visualizations is limited to unvaccinated participants who indicated an ability impediment.
stacked_bar5 <- function(df, variable, fill_label, title_label, caption_label){
variable <- enquo(variable)
df %>%
filter(vax_status == "unvax" & !is.na(!!variable) & ability == "no") %>%
select(ability_main, !!variable) %>%
group_by(!!variable, ability_main) %>%
count() %>%
group_by(!!variable) %>%
mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = ""),
percent = ifelse(round(n/sum(n) * 100, 0) < 6, "", percent),
p = rev(cumsum(rev(n/sum(n)))),
demo_num = as.numeric(!!variable)) %>%
ggplot(aes( x = demo_num, y = n, fill =ability_main))+
geom_bar(position = "fill", stat ="identity", color = "white", width = .7) +
custom_theme +
scale_fill_manual(values = cb_colors, labels = str_to_title)+
scale_x_continuous(breaks = 1:length(levels(df_v7 %>% pull(!!variable))),
limits = c(NA, length(levels(df_v7 %>% pull(!!variable))) +2),
labels =str_replace_all(c(levels(df_v7 %>% pull(!!variable))), " ", "\n"))+
geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") +
labs(x = "", y = "", fill = fill_label, title = title_label, caption = caption_label) +
scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) +
geom_text_repel(data = . %>% ungroup() %>%filter(demo_num == max(demo_num)) ,
aes(x = demo_num+0.3, y = p-0.01,
label = str_to_title(ability_main), color = ability_main),
nudge_x = 0.3, direction = "y", hjust = "left", size = 16/.pt)+
scale_color_manual(values = cb_colors) +
theme(legend.position = "none")
}
stacked_bar5(df_v7, variable = gender,
fill_label = "Ability Impediments by Gender",
title_label = "",
caption_label = "")
stacked_bar5(df_v7, variable = ethnicity,
fill_label = "", title_label = "Ability Impediments by Ethnicity",
caption_label = "")
stacked_bar5(df_v7, variable = education,
fill_label = "",
title_label = "Ability Impediments by Education",
caption_label = "")
stacked_bar5(df_v7, variable = religiosity,
fill_label = "",
title_label = "Ability Impediments by Religiosity",
caption_label = "")
stacked_bar5(df_v7, variable = location,
fill_label = "", title_label = "Ability Impediments by Location",
caption_label = "")
stacked_bar5(df_v7, variable = politics, fill_label = "",
title_label = "Ability Impediments by Politics", caption_label = "")