The purpose of this script is to run descriptive statistics for various demographics variables, impediments, and best treatments, on the full pilot data. The current script uses the pilot version 7 data as a placeholder until we get the full pilot data.

Insert Description of Full Pilot Here

Cleaning and Filtering

The data used in this script has already been cleaned in the data cleaning script.

This descriptive statistics script will use the version 7 data, filtered to completes only. Also, obsrvations with missing vax status are dropped. There are 2357 observations.

One additional cleaning step in this analysis remove observations for which vaccination status is “other” (1 observation for v7). We also run the factor_cols.R file, which provides order to variables (ie. location: urban > suburban > rural > other > prefer not to say), since these orderings are not saved in the csv files.

Data Dictionary

Demographics

Starred variables have a corresponding variable, with the suffix _num (ie education_num) that provides a numeric ordering of the variables. This order is listed in parenthesis.

Variable Description
vax_status* unvax (0) or unvax (1)
gender* male (0) or female (1)
age integer 1 to 99
education* < high school (1), high school (2), some college (3), 2-year degree (4), 4-year degree (5), graduate degree (6), other, prefer not to say
income* <R5,000 (1), R5,000 – R9,999 (2), R10,000 – R29,999 (3), R30,000 – R49,999 (4), R50,000 – R99,999 (5), > R100,000 (6), other
location* rural(1), suburban (2), urban(3), other, prefer not to say
politics* conservative (1), moderate (2), liberal(3), other, prefer not to say
religiosity* not very religious (1), somewhat religious (2), very religious (3), other, prefer not to say
white 1 if the participant is a white or caucasian, 0 if not
ethnicity asian or indian, black or african, coloured, white or caucasian, other, prefer not to say

Impediments

Variable Description
motive_main benefit, risk, other
benefit_main if motive_main == benefit: covid not dangerous, had covid already, unlikely to get sick, vaccines don’t work, other reason(s)
risk_main if motive_main == risk: bad side effects, don’t trust pharma, needles/pain, not enough testing, vaccines don’t work, other reason(s), other
ability_main availability, money, time, other
availability_main if ability_main == availability: no vaccines left, too far away, other reason(s)
money_main if ability_main == money: no cash, no insurance, travel costs, other reason(s)
time_main if ability_main == time: childcare, getting off worke, no time to research, other reason(s), other

Best Treatment

Variable Description
best_treatment family supports it, rewards for vaxxing, trusted info source, something else, nothering, other

Summary Statistics Tables

Continuous Variables

df_v7 %>%
  select_if(is.numeric) %>%
  select(vax_status = vax_status_num, age, education = education_num, income = income_num, location=location_num, politics = politics_num, religiosity = religiosity_num, gender = gender_num, white) %>%
  describe(quant = c(.25,.75) ) %>%
  select(n, mean, sd, se, min, min, first_quartile = Q0.25, median, third_quartile = Q0.75, max) %>%
  clean_names(case = "title") %>%
  rename(SD = Sd, SE = Se) %>%
  kable(caption = "Summary Statistics for Continuous Demographic Variables", digits = 3)%>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Summary Statistics for Continuous Demographic Variables
N Mean SD SE Min First Quartile Median Third Quartile Max
vax_status 2357 0.573 0.495 0.010 0 0 1 1 1
age 2257 29.239 10.999 0.232 18 21 26 34 83
education 2191 2.291 1.304 0.028 1 1 2 3 6
income 1722 2.066 1.526 0.037 1 1 1 3 6
location 2192 2.046 0.853 0.018 1 1 2 3 3
politics 1298 1.912 0.707 0.020 1 1 2 2 3
religiosity 1972 2.327 0.761 0.017 1 2 3 3 3
gender 2340 0.512 0.500 0.010 0 0 1 1 1
white 2357 0.076 0.264 0.005 0 0 0 0 1
df_v7 %>%
  select(vax_status, age, education = education_num, income = income_num, location=location_num, politics = politics_num, religiosity = religiosity_num, gender = gender_num, white) %>% 
  split(.$vax_status) %>% 
  map(describe) %>% 
  do.call(rbind, .) %>%
  data.frame() %>%
  select(n, mean , se) %>%
  rownames_to_column() %>%
  separate(rowname, c("vax_status", "variable"), sep = "\\.") %>%
  filter(variable != "vax_status*") %>%
  pivot_longer(cols = c("n", "mean", "se")) %>%
  mutate(column = paste(vax_status, name, sep = "_")) %>%
  select(column, variable, value) %>%
  pivot_wider(names_from = column) %>%
  kable(digits = 3,caption = "Summary Statistics for Continuous Demographic Variables by Vaccination Status", 
        col.names = c("", "N", "Mean", "SE", "N", "Mean", "SE")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))%>%
  add_header_above(c(" " = 1, "Unvaccinated" = 3, "Vaccinated" = 3))
Summary Statistics for Continuous Demographic Variables by Vaccination Status
Unvaccinated
Vaccinated
N Mean SE N Mean SE
age 960 28.413 0.336 1297 29.850 0.316
education 934 2.336 0.045 1257 2.257 0.035
income 691 2.155 0.061 1031 2.006 0.046
location 920 2.023 0.028 1272 2.062 0.024
politics 498 1.972 0.033 800 1.875 0.024
religiosity 814 2.303 0.027 1158 2.344 0.022
gender 997 0.463 0.016 1343 0.548 0.014
white 1006 0.094 0.009 1351 0.061 0.007

Factor Variables

df_v7 %>%
  select(gender, income, ethnicity, education, religion, politics, location) %>%
  summarize_factor() %>%
  data.frame() %>%
  rename(variable = "X.", Percent = "X..2") %>%
  select(!X..1) %>%
  mutate(variable = ifelse(variable == "", NA, variable)) %>%
  fill(data = ., variable) %>%
  arrange(variable) %>%
  kable(digits = 3,caption = "Summary Statistics for Factor Demographic Variables", 
        col.names = c("", "Value", "N", "Percent")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))%>%
    collapse_rows() %>%
  scroll_box( height = "500px")
Summary Statistics for Factor Demographic Variables
Value N Percent
education < high school 635 26.9
high school 859 36.4
some college 429 18.2
2-year degree 80 3.4
4-year degree 69 2.9
graduate degree 119 5.0
other 68 2.9
prefer not to say 98 4.2
ethnicity asian or indian 54 2.3
black or african 1742 73.9
coloured 228 9.7
white or caucasian 178 7.6
other 100 4.2
prefer not to say 55 2.3
gender female 1198 50.8
male 1142 48.5
<Missing> 17 0.7
income < R5,000 951 40.3
R5,000 – R9,999 286 12.1
R10,000 – R29,999 215 9.1
R30,000 – R49,999 72 3.1
R50,000 – R99,999 87 3.7
> R100,000 111 4.7
other 98 4.2
<Missing> 537 22.8
location urban 850 36.1
suburban 592 25.1
rural 750 31.8
other 34 1.4
prefer not to say 131 5.6
politics conservative 386 16.4
moderate 640 27.2
liberal 272 11.5
other 31 1.3
prefer not to say 1028 43.6
religion african traditional 241 10.2
christian 1778 75.4
hinduism 19 0.8
islam 56 2.4
no religion 120 5.1
other 70 3.0
prefer not to say 72 3.1
<Missing> 1 0.0
df_v7 %>%
  select(vax_status, gender, income, ethnicity, education, religion, politics, location) %>%
  split(.$vax_status) %>% 
  map(summarize_factor) %>% 
  do.call(rbind, .) %>%
  data.frame() %>%
  rownames_to_column("vax_status") %>%
  rename(variable= "X.", Percent = "X..2") %>%
  select(!X..1)  %>%
  mutate(variable = ifelse(variable == "", NA, variable)) %>%
  fill(data = ., variable) %>%
  mutate(vax_status = str_extract(vax_status, "unvax|vax"), 
         Percent= as.numeric(Percent)) %>%
  pivot_longer(cols = c("N", "Percent")) %>%
  mutate(column = paste(vax_status, name, sep = "_")) %>%
  select(column, variable, Level, value) %>%
  pivot_wider(names_from = "column")  %>%
  as.data.frame() %>%
    arrange(variable) %>%
  kable(digits = 3,caption = "Summary Statistics for Factor Demographic Variables by Vaccination Status", 
        col.names = c("", "Value", "N", "Percent", "N", "Percent")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))%>%
  add_header_above(c(" " = 2, "Unvaccinated" = 2, "Vaccinated" = 2))%>%
  collapse_rows() %>%
  scroll_box( height = "500px") 
Summary Statistics for Factor Demographic Variables by Vaccination Status
Unvaccinated
Vaccinated
Value N Percent N Percent
education < high school 255 25.3 380 28.1
high school 394 39.2 465 34.4
some college 160 15.9 269 19.9
2-year degree 32 3.2 48 3.6
4-year degree 27 2.7 42 3.1
graduate degree 66 6.6 53 3.9
other 22 2.2 46 3.4
prefer not to say 50 5.0 48 3.6
ethnicity asian or indian 20 2.0 34 2.5
black or african 722 71.8 1020 75.5
coloured 101 10.0 127 9.4
white or caucasian 95 9.4 83 6.1
other 30 3.0 70 5.2
prefer not to say 38 3.8 17 1.3
gender female 462 45.9 736 54.5
male 535 53.2 607 44.9
<Missing> 9 0.9 8 0.6
income < R5,000 375 37.3 576 42.6
R5,000 – R9,999 106 10.5 180 13.3
R10,000 – R29,999 86 8.5 129 9.5
R30,000 – R49,999 30 3.0 42 3.1
R50,000 – R99,999 40 4.0 47 3.5
> R100,000 54 5.4 57 4.2
other 32 3.2 66 4.9
<Missing> 283 28.1 254 18.8
location urban 347 34.5 503 37.2
suburban 247 24.6 345 25.5
rural 326 32.4 424 31.4
other 9 0.9 25 1.9
prefer not to say 77 7.7 54 4.0
politics conservative 138 13.7 248 18.4
moderate 236 23.5 404 29.9
liberal 124 12.3 148 11.0
other 12 1.2 19 1.4
prefer not to say 496 49.3 532 39.4
religion african traditional 128 12.7 113 8.4
christian 693 68.9 1085 80.3
hinduism 4 0.4 15 1.1
islam 33 3.3 23 1.7
no religion 66 6.6 54 4.0
other 42 4.2 28 2.1
prefer not to say 40 4.0 32 2.4
<Missing> NA NA 1 0.1

Associations

The following tabs show the correlations between demographics, and between demographics and motivation and ability impediments.

Demographics

numeric_cols <- c("gender_num", "income_num", "education_num", 
                   "religiosity_num", "politics_num", "location_num", 
                   "white", "vax_status_num"
                   )

df_numeric_demo <- df_v7 %>%
  select(all_of(numeric_cols))
colnames(df_numeric_demo) <-str_to_title(str_replace(str_remove(colnames(df_numeric_demo), "_num"), "_", " "))

ggcorrplot(cor(df_numeric_demo, use = "pairwise.complete.obs"), type = "lower", lab = TRUE, lab_size = 12/.pt, tl.cex = 10, colors= c(cb_colors[1], "white", cb_colors[2])) + 
  custom_theme + 
  labs(y = "", x = "", title = "Correlation Matrix: \nFor Demographic Variables")+
  theme(axis.text.x = element_text(angle = 45, hjust=1))

Motivation

risky <- df_v7 %>% 
  mutate(
    risky = if_else(motive_main == "risk", 1L, 0L)) %>%
  select(risky, gender_num, income_num, education_num, religiosity_num, politics_num, location_num, white) %>%
  cor(use = "pairwise.complete.obs") %>%
  .[1, 2:8]

against_beliefs <- df_v7 %>% 
  mutate(
    against_beliefs = if_else(motive_main == "belief", 1L, 0L)) %>%
  select(against_beliefs, gender_num, income_num, education_num, religiosity_num, politics_num, location_num, white) %>%
  cor(use = "pairwise.complete.obs") %>%
  .[1, 2:8]

no_benefits <- df_v7 %>% 
  mutate(
    no_benefits = if_else(motive_main == "benefit", 1L, 0L)) %>%
  select(no_benefits, gender_num, income_num, education_num, religiosity_num, politics_num, location_num, white) %>%
  cor(use = "pairwise.complete.obs") %>%
  .[1, 2:8]

final_mat <-
  tibble(
    against_beliefs, 
    no_benefits, risky
  ) %>% 
  as.matrix()

rownames(final_mat) <- c("gender", "income", "education", "religiosity", "politics", "location", "white")
colnames(final_mat) <- c( "against_beliefs", "no_benefits", "risky")

ggcorrplot(final_mat,  lab = TRUE, lab_size = 12/.pt, tl.cex = 10, colors= c(cb_colors[1], "white", cb_colors[2])) + 
  custom_theme + 
  labs(y = "", x = "", title = "Correlation Matrix: \nMotivation and Demographic Variables", caption = "Against beliefs is removed do to their being no correlation. \nIf the full pilot does show a relationship, it will visualized here.")+
  theme(axis.text.x = element_text(angle = 45, hjust=1)) 

Ability

availability <- df_v7 %>% 
  mutate(
    availability = if_else(ability_main == "availability", 1L, 0L)) %>%
  select(availability, gender_num, income_num, education_num, religiosity_num, politics_num, location_num, white) %>%
  cor(use = "pairwise.complete.obs") %>%
  .[1, 2:8]

money <- df_v7 %>% 
  mutate(
    money = if_else(ability_main == "money", 1L, 0L)) %>%
  select(money, gender_num, income_num, education_num, religiosity_num, politics_num, location_num, white) %>%
  cor(use = "pairwise.complete.obs") %>%
  .[1, 2:8]

time <- df_v7 %>% 
  mutate(
    time = if_else(ability_main == "time", 1L, 0L)) %>%
  select(time, gender_num, income_num, education_num, religiosity_num, politics_num, location_num, white) %>%
  cor(use = "pairwise.complete.obs") %>%
  .[1, 2:8]

final_mat <-
  tibble(
    availability, 
    money, time
  ) %>% 
  as.matrix()

rownames(final_mat) <- c("gender", "income", "education", "religiosity", "politics", "location", "white")
colnames(final_mat) <- c( "availability", "money", "time")

ggcorrplot(final_mat,  lab = TRUE, lab_size = 12/.pt, tl.cex = 10, colors= c(cb_colors[1], "white", cb_colors[2])) + 
  custom_theme + 
  labs(y = "", x = "", title = "Correlation Matrix: \nAbility and Demographic Variables")+
  theme(axis.text.x = element_text(angle = 45, hjust=1)) 

Forks

Next, we show the number of participants in each of the twelve forks, and the percentage of participants at the vaccination status level.

df_v7 %>% 
  group_by(vax_status, ability, motive) %>%
  count() %>%
  drop_na(ability, motive) %>% 
  ungroup() %>%
  group_by(vax_status) %>% 
  mutate(`% of vax status` = round(100 * n/sum(n, na.rm = T), 2), 
         ability = str_to_title(ifelse(ability == "yes", "ability", "no ability")), 
         motive = str_to_title(ifelse(motive == "yes", "motive", "no motive")), 
         vax_status = str_to_title(ifelse(vax_status == "vax", "Vaccinated", "Unvaccinated"))) %>% 
  ungroup() %>% 
  arrange(vax_status, desc(n)) %>%
  kable(col.names = 
          c("", "", "", "Count", "Percentage within Vaccination Status"), 
        align = "c",
        caption = "Distribution of forking segments of impediments") %>%
  kable_styling()
Distribution of forking segments of impediments
Count Percentage within Vaccination Status
Unvaccinated Ability No Motive 684 67.99
Unvaccinated No Ability No Motive 260 25.84
Unvaccinated Ability Motive 42 4.17
Unvaccinated No Ability Motive 20 1.99
Vaccinated Ability Motive 733 54.34
Vaccinated Ability No Motive 431 31.95
Vaccinated No Ability Motive 98 7.26
Vaccinated No Ability No Motive 87 6.45

Summary Figures

stacked_bar <- function(df, variable,filter_phrase, fill_label, title_label, caption_label){
  
  variable <- enquo(variable)

  df %>%
    filter(if(!is.null(filter_phrase)) eval(parse(text = filter_phrase)) else TRUE) %>%
    select(!!variable, vax_status, vax_status_num)  %>%
    group_by(!!variable, vax_status, vax_status_num) %>%
    count() %>%
    group_by(vax_status_num, vax_status) %>%
    mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = ""), 
           percent = ifelse(round(n/sum(n) * 100, 0) < 6, "", percent)) %>%
    ggplot(aes( x = vax_status_num, y = n, fill =as.factor(!!variable)))+
    geom_bar(position = "fill", stat ="identity", color = "white", width = .7)  +
    custom_theme +
    scale_x_continuous(breaks = c(0, 1), labels = c("Unvaccinated", "Vaccinated"))+
    scale_fill_manual(values = cb_colors, labels = str_to_title)+
    geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") + 
   labs(x = "", y = "", fill= fill_label, 
         title = title_label,
        caption = caption_label) +
    scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) 
  
  
}
stacked_bar2 <- function(df, variable,filter_phrase, fill_label, title_label, caption_label){
  
  variable <- enquo(variable)
  

  df %>%
    filter(if(!is.null(filter_phrase)) eval(parse(text = filter_phrase)) else TRUE) %>%
    select(!!variable, vax_status, vax_status_num)  %>%
    group_by(!!variable, vax_status, vax_status_num) %>%
    count() %>%
    group_by(vax_status_num, vax_status) %>%
    mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = ""), 
           percent = ifelse(round(n/sum(n) * 100, 0) < 6, "", percent), 
           p = rev(cumsum(rev(n/sum(n))))) %>%
    ggplot(aes( x = vax_status_num, y = n, fill =as.factor(!!variable)))+
    geom_bar(position = "fill", stat ="identity", color = "white", width = .7)  +
    custom_theme +
    scale_x_continuous(breaks = c(0, 1), labels = c("Unvaccinated", "Vaccinated"), limits = c(NA, 2.4))+
    scale_fill_manual(values = cb_colors, labels = str_to_title)+
    geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") + 
   labs(x = "", y = "", fill= fill_label, 
         title = title_label,
        caption = caption_label) +
    scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) +
    geom_text_repel(data = . %>% filter(vax_status == "vax") , aes(x = vax_status_num+0.3, y = p- 0.01,
                                                                 label = str_to_title(!!variable), color = !!variable), 
                  nudge_x = 0.15, direction = "y", hjust = "left", size = 16/.pt) +
    scale_color_manual(values = cb_colors) +
    theme(legend.position = "none")
  
}

First, we show the percentage of participants by vaccination status.

df_v7 %>% 
  select(vax_status) %>%
  group_by(vax_status) %>%
  count() %>%
  ungroup() %>%
  mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = "")) %>%
  ggplot(aes( x = 1, y = n, fill = vax_status))+
  geom_bar(position = "fill", stat ="identity", width = .7, color = "white") +
  geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") + 
  coord_flip() +
  custom_theme +
  scale_x_continuous(breaks = c(1),  limits = c(0.6, 1.8)) + 
  scale_fill_manual(values = c(cb_colors[3], cb_colors[6]), labels = c("Unvax", "Vax")) +
  labs(x = "", y ="", title = "Percentage of Participants by Vaccination Status")+
    geom_label(aes(x = 1.6, y = .29), label = "Vaccinated",  size = 16/.pt, label.size = NA, fill = "white", color = cb_colors[6])+
    geom_label(aes(x = 1.6, y = 0.79), label = "Unvaccinated", label.size = NA, fill = "white", color = cb_colors[3], size = 16/.pt) +
  theme(legend.position = "none", 
        axis.text.y  = element_blank()) +
  scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) 

Demographics by Vaccination Status

This section visualizes various demographic variables by vaccination status.

Gender

df_v7 %>%
  stacked_bar(variable = gender, 
              filter_phrase = NULL, 
              fill_label = "Gender", 
              title_label= "Share of Participants by Gender\nand Vaccination Status", 
              caption_label= "")

Age

df_v7 %>%
  select(age, vax_status) %>%
  ggplot(aes(x = age, group = vax_status,  fill = vax_status)) + 
  geom_density(alpha = 0.5) +
  custom_theme +
  scale_x_continuous(limits = c(0, 100), breaks = seq(0, 100, 10))+ 
  scale_fill_manual(values = c(cb_colors[3], cb_colors[6]))+ 
 labs(x = "Age", y = "Density", fill= "Vax Status", 
      title = paste("Age Distribution by <span style='color:", cb_colors[6], ";'>Vaccinated</span> and <span style='color:", cb_colors[3], ";'>Unvaccinated</span>", sep = "")) +
  theme(legend.position = "none", 
        plot.title = element_markdown(size = 20))

Education

df_v7 %>%
  stacked_bar(variable = education, 
              filter_phrase = NULL, 
              fill_label = "Education", 
              title_label= "Share of Participants by Education\nand Vaccination Status", 
              caption_label= "") 

Religiosity

df_v7 %>%
  stacked_bar(variable = religiosity, 
              filter_phrase = NULL, 
              fill_label = "Religiosity", 
              title_label= "Share of Participants by Religiosity\nand Vaccination Status", 
              caption_label= "")

df_v7 %>%
  stacked_bar(variable = religion, 
              filter_phrase = NULL, 
              fill_label = "Religion", 
              title_label= "Share of Participants by Religion\nand Vaccination Status", 
              caption_label= "")

Income

df_v7 %>%
  stacked_bar(variable = income, 
              filter_phrase = NULL, 
              fill_label = "Income", 
              title_label= "Share of Participants by Income\nand Vaccination Status", 
              caption_label= "")

Location

df_v7 %>%
  stacked_bar(variable = location, 
              filter_phrase = NULL, 
              fill_label = "Location", 
              title_label= "Share of Participants by Location\nand Vaccination Status", 
              caption_label= "")

Ethnicity

df_v7 %>%
  stacked_bar(variable = ethnicity, 
              filter_phrase = NULL, 
              fill_label = "Ethnicity", 
              title_label= "Share of Participants by Ethnicity\nand Vaccination Status", 
              caption_label= "")

Politics

df_v7 %>%
  stacked_bar(variable = politics, 
              filter_phrase = NULL, 
              fill_label = "Politics", 
              title_label= "Share of Participants by Politics\nand Vaccination Status", 
              caption_label= "")

Motivation Impediments

This section visualizes various demographic variables by motivation impediments. The data used in this section are limited to participants who indicated a motive impediment.

Overall

df_v7 %>%
  stacked_bar2(variable = motive_main, 
              filter_phrase = 'motive == "no"', 
              fill_label = "", 
              title_label= "Share of Participants by Motive\nand Vaccination Status", 
              caption_label= "")

Risk

For participants who identified their main motive impediment as risk.

df_v7 %>%
  stacked_bar2(variable = risk_main, 
              filter_phrase = 'motive == "no" & motive_main == "risk"', 
              fill_label = "", 
              title_label= "Share of Participants by Risk\nand Vaccination Status", 
              caption_label= "")

Benefit

For participants who identified their main motive impediment as benefit.

df_v7 %>%
  stacked_bar2(variable = benefit_main, 
              filter_phrase = 'motive == "no" & motive_main == "benefit"', 
              fill_label = "", 
              title_label= "Share of Participants by Benefit\nand Vaccination Status", 
              caption_label= "") 

Ability Impediments

Overall

This section visualizes various demographic variables by ability impediments. The data used in this section are limited to participants who indicated a ability impediment.

df_v7 %>%
  stacked_bar2(variable = ability_main, 
              filter_phrase = 'ability == "no"', 
              fill_label = "", 
              title_label= "Share of Participants by Ability\nand Vaccination Status", 
              caption_label= "")

Availability

For participants who identified their main ability impediment as availability.

df_v7 %>%
  stacked_bar2(variable = availability_main, 
              filter_phrase = 'ability == "no" & ability_main == "availability"', 
              fill_label = "", 
              title_label= "Share of Participants by Availability\nand Vaccination Status", 
              caption_label= "")

Money

For participants who identified their main ability impediment as money.

df_v7 %>%
  stacked_bar2(variable = money_main, 
              filter_phrase = 'ability == "no" & ability_main == "money"', 
              fill_label = "", 
              title_label= "Share of Participants by Money\nand Vaccination Status", 
              caption_label= "")

Time

For participants who identified their main ability impediment as time.

df_v7 %>%
  stacked_bar2(variable = time_main, 
              filter_phrase = 'ability == "no" & ability_main == "time"', 
              fill_label = "", 
              title_label= "Share of Participants by Time\nand Vaccination Status", 
              caption_label= "")

Best Treatment

This figure shows the percentage of participants by best treatment.

df_v7 %>%
  stacked_bar2(variable = best_treatment, 
              filter_phrase = NULL, 
              fill_label = "Best Treatment", 
              title_label= "Percentage of Participants by Best Treatment\nand Vaccination Status", 
              caption_label= "")

Unvaccinated Participants

This section visualizas demographics by best treatment, motivation impediments, and ability impediments for the unvaccinated participants.

Demographics by Best Treatment, for Unvaccinated Participants

The following figures show the best treatment by various demographics, for unvaccinated participants. Additionally, observations with missing entries for a best treatment are removed. Grey bars show the percentage of missing observations.

stacked_bar3 <- function(df, variable, fill_label, title_label, caption_label){
  
  variable <- enquo(variable)
  
  df %>%
  filter(vax_status == "unvax" & !is.na(best_treatment)) %>%
  select(best_treatment, !!variable) %>%
  group_by(!!variable, best_treatment) %>%
  count() %>%
  group_by(best_treatment) %>%
    mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = ""), 
           percent = ifelse(round(n/sum(n) * 100, 0) < 6, "", percent), 
           p = rev(cumsum(rev(n/sum(n)))), 
           best_treatment_num = as.numeric(best_treatment)) %>%
    ggplot(aes( x = best_treatment_num, y = n, fill =!!variable))+
    geom_bar(position = "fill", stat ="identity", color = "white", width = .7)  +
    custom_theme +
    scale_fill_manual(values = cb_colors, labels = str_to_title)+
    scale_x_continuous(limits = c(NA, 8), breaks = 1:6, labels =str_replace_all(c(levels(df_v7$best_treatment)), " ", "\n")) +
    geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") + 
   labs(x = "", y = "", fill = fill_label, title = title_label, caption = caption_label) +
    scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) +
  geom_text_repel(data = . %>% filter(best_treatment == "other") , 
                  aes(x = best_treatment_num+0.3, y = p-0.01,
                                                                 label = str_to_title(!!variable), color = !!variable), 
                  nudge_x = 0.3, direction = "y", hjust = "left", size = 16/.pt)+
    scale_color_manual(values = cb_colors) +
    theme(legend.position = "none")
}

Gender

stacked_bar3(df = df_v7, 
             variable = gender, 
             fill_label ="", 
             title_label = "Gender by Best Treatment", caption_label = "")

Ethnicity

stacked_bar3(df = df_v7, 
             variable = ethnicity, 
             fill_label ="", 
             title_label = "Ethnicity by Best Treatment", caption_label = "")

Education

stacked_bar3(df = df_v7, 
             variable = education, 
             fill_label ="", 
             title_label = "Education by Best Treatment", caption_label = "")

Location

stacked_bar3(df = df_v7, 
             variable = location, 
             fill_label ="", 
             title_label = "Location by Best Treatment", caption_label = "")

Politics

stacked_bar3(df = df_v7, 
             variable = politics, 
             fill_label ="", 
             title_label = "Politics by Best Treatment", caption_label = "")

Religiosity

stacked_bar3(df = df_v7, 
             variable = religiosity, 
             fill_label ="", 
             title_label = "Religiosity by Best Treatment", caption_label = "")

Best Treatment by Demographics, for Unvaccinated Participants

The following figures show the best treatment by various demographics, for the unvaccinated group. Additionally, observations with missing entries for a given demographic variable are removed. Grey bars show the percentage of missing observations.

stacked_bar4 <- function(df, variable, fill_label, title_label, caption_label){

    variable <- enquo(variable)
  
  df %>%
  filter(vax_status == "unvax" & !is.na(!!variable)) %>%
  select(best_treatment, !!variable) %>%
  group_by(!!variable, best_treatment) %>%
  count() %>%
  group_by(!!variable) %>%
    mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = ""), 
           percent = ifelse(round(n/sum(n) * 100, 0) < 6, "", percent), 
           p = rev(cumsum(rev(n/sum(n)))), 
           demo_num = as.numeric(!!variable)) %>%
    ggplot(aes( x = demo_num, y = n, fill =best_treatment))+
    geom_bar(position = "fill", stat ="identity", color = "white", width = .7)  +
    custom_theme +
    scale_fill_manual(values = cb_colors, labels = str_to_title)+
    scale_x_continuous(breaks = 1:length(levels(df_v7 %>% pull(!!variable))),
      limits = c(NA, length(levels(df_v7 %>% pull(!!variable))) +2), 
      labels =str_replace_all(c(levels(df_v7 %>% pull(!!variable))), " ", "\n"))+
    geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") + 
  labs(x = "", y = "", fill = fill_label, title = title_label, caption = caption_label) +
    scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) +
  geom_text_repel(data = . %>% ungroup() %>%filter(demo_num == max(demo_num)) , 
                  aes(x = demo_num+0.3, y = p-0.01,
                                                                 label = str_to_title(best_treatment), color = best_treatment), 
                 nudge_x = 0.3, direction = "y", hjust = "left", size = 16/.pt)+
    scale_color_manual(values = cb_colors) +
    theme(legend.position = "none")
  
}

Gender

stacked_bar4(df = df_v7, variable = gender, fill_label = "", title_label = "Best Treatment by Gender", caption_label = "")

Ethnicity

stacked_bar4(df = df_v7, variable = ethnicity, fill_label = "", title_label = "Best Treatment by Ethnicity", caption_label = "")

Education

stacked_bar4(df = df_v7, variable = education, fill_label = "", title_label = "Best Treatment by Education", caption_label = "")

Religiosity

stacked_bar4(df = df_v7, variable = religiosity, fill_label = "", title_label = "Best Treatment by Religiosity", caption_label = "")

Politics

stacked_bar4(df = df_v7, variable = politics, fill_label = "", title_label = "Best Treatment by Politics", caption_label = "")

Location

stacked_bar4(df = df_v7, variable = location, fill_label = "", title_label = "Best Treatment by Location", caption_label = "")

Demographics by Motive Impediment, for Unvaccinated Participants

The following figures show the best treatment by various demographics, for unvaccinated participants. Additionally, observations with missing entries for a best treatment are removed. Grey bars show the percentage of missing observations.

stacked_bar6 <- function(df, variable, fill_label, title_label, caption_label){
  
  variable <- enquo(variable)
  
  df %>%
  filter(vax_status == "unvax" & !is.na(motive_main)) %>%
  select(motive_main, !!variable) %>%
  group_by(!!variable, motive_main) %>%
  count() %>%
  group_by(motive_main) %>%
    mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = ""), 
           percent = ifelse(round(n/sum(n) * 100, 0) < 6, "", percent), 
           p = rev(cumsum(rev(n/sum(n)))), 
           motive_main_num = as.numeric(motive_main)) %>%
    ggplot(aes( x = motive_main_num, y = n, fill =!!variable))+
    geom_bar(position = "fill", stat ="identity", color = "white", width = .7)  +
    custom_theme +
    scale_fill_manual(values = cb_colors, labels = str_to_title)+
    scale_x_continuous(limits = c(NA, length(levels(df_v7$motive_main)) + 2), breaks = 1:length(levels(df_v7$motive_main)), 
                       labels =str_replace_all(c(levels(df_v7$motive_main)), " ", "\n")) +
    geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") + 
   labs(x = "", y = "", fill = fill_label, title = title_label, caption = caption_label) +
    scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) +
  geom_text_repel(data = . %>% filter(motive_main == "other") , 
                  aes(x = motive_main_num+0.3, y = p-0.01,
                                                                 label = str_to_title(!!variable), color = !!variable), 
                  nudge_x = 0.3, direction = "y", hjust = "left", size = 16/.pt)+
    scale_color_manual(values = cb_colors) +
    theme(legend.position = "none")
}

Gender

stacked_bar6(df = df_v7, variable = gender, caption_label = "", 
           title_label = "Motive Impediment by Gender", fill_label = "")

Ethnicity

stacked_bar6(df = df_v7, variable = ethnicity, caption_label = "", 
           fill_label = "", title_label = "Motive Impediment by Ethnicity")

Education

stacked_bar6(df = df_v7, variable = education, caption_label = "", 
           fill_label = "", title_label = "Motive Impediment by Education")

Religiosity

stacked_bar6(df = df_v7, variable = religiosity, caption_label = "", 
           fill_label = "", title_label = "Motive Impediment by Religiosity")

Politics

stacked_bar6(df = df_v7, variable = politics, caption_label = "", 
           fill_label = "", title_label = "Motive Impediment by Politics")

Location

stacked_bar6(df = df_v7, variable = location, caption_label = "", 
           fill_label = "", title_label = "Motive Impediment by Location")

Motivation Impediments By Demographics, for Unvaccinated Participants

Data used in this section is limited to unvaccinated participants who indicataed a motivation impediment.

stacked_bar5 <- function(df, variable, fill_label, title_label, caption_label){

    variable <- enquo(variable)
  
  df %>%
  filter(vax_status == "unvax" & !is.na(!!variable) & motive == "no") %>%
  select(motive_main, !!variable) %>%
  group_by(!!variable, motive_main) %>%
  count() %>%
  group_by(!!variable) %>%
    mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = ""), 
           percent = ifelse(round(n/sum(n) * 100, 0) < 6, "", percent), 
           p = rev(cumsum(rev(n/sum(n)))), 
           demo_num = as.numeric(!!variable)) %>%
    ggplot(aes( x = demo_num, y = n, fill =motive_main))+
    geom_bar(position = "fill", stat ="identity", color = "white", width = .7)  +
    custom_theme +
    scale_fill_manual(values = cb_colors, labels = str_to_title)+
    scale_x_continuous(breaks = 1:length(levels(df_v7 %>% pull(!!variable))),
      limits = c(NA, length(levels(df_v7 %>% pull(!!variable))) +2), 
      labels =str_replace_all(c(levels(df_v7 %>% pull(!!variable))), " ", "\n"))+
    geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") + 
  labs(x = "", y = "", fill = fill_label, title = title_label, caption = caption_label) +
    scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) +
  geom_text_repel(data = . %>% ungroup() %>%filter(demo_num == max(demo_num)) , 
                  aes(x = demo_num+0.3, y = p-0.01,
                                                                 label = str_to_title(motive_main), color = motive_main), 
                 nudge_x = 0.3, direction = "y", hjust = "left", size = 16/.pt)+
    scale_color_manual(values = cb_colors) +
    theme(legend.position = "none")
  
}

Gender

stacked_bar5(df_v7, variable = gender, fill_label = "", caption_label = "", title_label = "Motive Impediment by Gender")

Ethnicity

stacked_bar5(df_v7, variable = ethnicity, fill_label = "", caption_label = "", title_label = "Motive Impediment by Ethnicity")

Education

stacked_bar5(df_v7, variable = education, fill_label = "", caption_label = "", title_label = "Motive Impediment by Education")

Religiosity

stacked_bar5(df_v7, variable = religiosity, fill_label = "", caption_label = "", title_label = "Motive Impediment by Religiosity")

Location

stacked_bar5(df_v7, variable = location, fill_label = "", caption_label = "", title_label = "Motive Impediment by Location")

Politics

stacked_bar5(df_v7, variable = politics, fill_label = "", caption_label = "", title_label = "Motive Impediment by Politics")

Demographics by Ability Impediment, for Unvaccinated Participants

The following figures show the best treatment by various demographics, for unvaccinated participants. Additionally, observations with missing entries for a best treatment are removed. Grey bars show the percentage of missing observations.

stacked_bar6 <- function(df, variable, fill_label, title_label, caption_label){
  
  variable <- enquo(variable)
  
  df %>%
  filter(vax_status == "unvax" & !is.na(ability_main)) %>%
  select(ability_main, !!variable) %>%
  group_by(!!variable, ability_main) %>%
  count() %>%
  group_by(ability_main) %>%
    mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = ""), 
           percent = ifelse(round(n/sum(n) * 100, 0) < 6, "", percent), 
           p = rev(cumsum(rev(n/sum(n)))), 
           ability_main_num = as.numeric(ability_main)) %>%
    ggplot(aes( x = ability_main_num, y = n, fill =!!variable))+
    geom_bar(position = "fill", stat ="identity", color = "white", width = .7)  +
    custom_theme +
    scale_fill_manual(values = cb_colors, labels = str_to_title)+
    scale_x_continuous(limits = c(NA, length(levels(df_v7$ability_main)) + 2), breaks = 1:length(levels(df_v7$ability_main)), 
                       labels =str_replace_all(c(levels(df_v7$ability_main)), " ", "\n")) +
    geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") + 
   labs(x = "", y = "", fill = fill_label, title = title_label, caption = caption_label) +
    scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) +
  geom_text_repel(data = . %>% filter(ability_main == "other") , 
                  aes(x = ability_main_num+0.3, y = p-0.01,
                                                                 label = str_to_title(!!variable), color = !!variable), 
                  nudge_x = 0.3, direction = "y", hjust = "left", size = 16/.pt)+
    scale_color_manual(values = cb_colors) +
    theme(legend.position = "none")
}

Gender

stacked_bar6(df = df_v7, variable = gender, caption_label = "", 
           title_label = "Ability Impediment by Gender", fill_label = "")

Ethnicity

stacked_bar6(df = df_v7, variable = ethnicity, caption_label = "", 
           fill_label = "", title_label = "Ability Impediment by Ethnicity")

Education

stacked_bar6(df = df_v7, variable = education, caption_label = "", 
           fill_label = "", title_label = "Ability Impediment by Education")

Religiosity

stacked_bar6(df = df_v7, variable = religiosity, caption_label = "", 
           fill_label = "", title_label = "Ability Impediment by Religiosity")

Politics

stacked_bar6(df = df_v7, variable = politics, caption_label = "", 
           fill_label = "", title_label = "Ability Impediment by Politics")

Location

stacked_bar6(df = df_v7, variable = location, caption_label = "", 
           fill_label = "", title_label = "Ability Impediment by Location")

Ability Impediments By Demographics, for Unvaccinated Participants

The data in these visualizations is limited to unvaccinated participants who indicated an ability impediment.

stacked_bar5 <- function(df, variable, fill_label, title_label, caption_label){

    variable <- enquo(variable)
  
  df %>%
  filter(vax_status == "unvax" & !is.na(!!variable) & ability == "no") %>%
  select(ability_main, !!variable) %>%
  group_by(!!variable, ability_main) %>%
  count() %>%
  group_by(!!variable) %>%
    mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = ""), 
           percent = ifelse(round(n/sum(n) * 100, 0) < 6, "", percent), 
           p = rev(cumsum(rev(n/sum(n)))), 
           demo_num = as.numeric(!!variable)) %>%
    ggplot(aes( x = demo_num, y = n, fill =ability_main))+
    geom_bar(position = "fill", stat ="identity", color = "white", width = .7)  +
    custom_theme +
    scale_fill_manual(values = cb_colors, labels = str_to_title)+
    scale_x_continuous(breaks = 1:length(levels(df_v7 %>% pull(!!variable))),
      limits = c(NA, length(levels(df_v7 %>% pull(!!variable))) +2), 
      labels =str_replace_all(c(levels(df_v7 %>% pull(!!variable))), " ", "\n"))+
    geom_text(aes( label = percent), color = "white", position = position_fill(vjust = 0.5), size = 16/.pt, fontface = "bold") + 
  labs(x = "", y = "", fill = fill_label, title = title_label, caption = caption_label) +
    scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%")) +
  geom_text_repel(data = . %>% ungroup() %>%filter(demo_num == max(demo_num)) , 
                  aes(x = demo_num+0.3, y = p-0.01,
                                                                 label = str_to_title(ability_main), color = ability_main), 
                 nudge_x = 0.3, direction = "y", hjust = "left", size = 16/.pt)+
    scale_color_manual(values = cb_colors) +
    theme(legend.position = "none")
  
}

Gender

stacked_bar5(df_v7, variable = gender,
             fill_label = "Ability Impediments by Gender", 
             title_label = "", 
             caption_label = "")

Ethnicity

stacked_bar5(df_v7, variable = ethnicity, 
             fill_label = "", title_label = "Ability Impediments by Ethnicity", 
             caption_label = "")

Education

stacked_bar5(df_v7, variable = education, 
             fill_label = "", 
             title_label = "Ability Impediments by Education", 
             caption_label = "")

Religiosity

stacked_bar5(df_v7, variable = religiosity, 
             fill_label = "", 
             title_label = "Ability Impediments by Religiosity",
             caption_label = "")

Location

stacked_bar5(df_v7, variable = location, 
             fill_label = "", title_label = "Ability Impediments by Location",
             caption_label = "")

Politics

stacked_bar5(df_v7, variable = politics, fill_label = "", 
             title_label = "Ability Impediments by Politics", caption_label = "")