## Load packages
pacman::p_load(DT, estimatr, kableExtra, readr, reshape2, tidyverse, xtable, dataMaid, ggcorrplot, ggmap, rpart, rpart.plot, pollster, wordcloud, tm, RColorBrewer, hrbrthemes, janitor, purrr, gridExtra, cowplot, rcompanion, nnet, texreg, compareGroups, caret, randomForest)
set.seed(94305)## Data Handling
### read ads data
ads_v6 <-
read.csv("data/old/ads_data_v6.csv") %>%
rename(original_ref = Ad.ID, ad_name = Ad.name) %>%
# group_by(ad_name) %>%
# summarise_at(vars(Impressions, Link.clicks, Results, `Amount.spent..USD.`), ~ sum(., na.rm = TRUE)) %>%
remove_empty() %>%
inner_join(
read_csv(here::here("data/Pilot_creative_text.xlsx - Pilot_5.csv")) %>% rename(ad_name = Ad),
by = "ad_name"
) %>%
relocate(original_ref, ad_name)
ads_v5 <-
read.csv("data/old/ads_data_v5.csv") %>%
rename(original_ref = Ad.ID, ad_name = Ad.name) %>%
remove_empty() %>%
inner_join(
read_csv(here::here("data/Pilot_creative_text.xlsx - Pilot_5.csv")) %>% rename(ad_name = Ad),
by = "ad_name"
) %>%
relocate(original_ref, ad_name)
# ads_v4b <- read.csv("data/old/ads_data_v4b.csv")
# ads_v4a <- read.csv("data/old/ads_data_v4a.csv")
# ads %>% glimpse()
form_percent <- function(dec){
return(paste0(round(dec* 100, 1), "%"))
}
form_cost <- function(num){
if (num == 0) {
return(NA_character_)
} else {
return(paste0("$", round(cost / num, 3)))
}
}
# sum(ads_v6$Impressions)# read in CURRENT survey data
df_full_v6 <-
read_csv("data/latest/Share_Your_Voice_v6_ALP.csv") %>%
clean_names() %>%
mutate_if(is.character, ~ str_replace_all(., '[\n\t]', '')) %>%
mutate(
first_name = if_else(is.na(first_name), "", first_name),
middle_name = if_else(is.na(middle_name), "", middle_name),
last_name = if_else(is.na(last_name), "", last_name),
full_name = str_c(first_name, middle_name, last_name, sep = " "),
full_name_short = str_c(first_name, last_name, sep = " "),
date = lubridate::date(signed_up)
) %>%
filter(!(full_name %in% c("Robert Kuan", "James Li", "Kaylin Rochford", "Saurabh Khanna", "Dingchen Sha", "Kristine Koutout", "Susan Athey", "Dean Karlan"))) %>%
filter(date > "2022-03-23", !is.na(version)) %>%
mutate(original_ref = parse_number(original_ref)) %>%
mutate(
motive = if_else(str_detect(motive, "yes"), "yes", "no"),
motive_main = if_else(str_detect(motive_main, "risk"), "risk", motive_main)
) %>%
left_join(ads_v6, by = "original_ref") %>%
remove_empty()
# filtering for current wave (main data for this wave)
df_v6 <-
df_full_v6 %>%
filter(version == "pilot_6", full_complete == "yes") %>%
drop_na(vax_status) %>%
remove_empty()
# removing duplicates (keep first response per phone no.)
df_v6 <-
df_v6 %>%
mutate(
phone_number = str_replace_all(phone_number, " ", ""),
phone_number = str_replace_all(phone_number, "-", ""),
) %>%
arrange(phone_number, last_seen) %>%
distinct(phone_number, .keep_all = T) %>%
remove_empty()
## OLD WAVES
# add full data for future wave
# place code from chunk above
df_full_v5 <-
read_csv("data/latest/Share_Your_Voice_2022_02_22_03_35_54.csv") %>%
clean_names() %>% remove_empty() %>%
mutate_if(is.character, ~ str_replace_all(., '[\n\t]', '')) %>%
mutate(
full_name = str_c(first_name, last_name, sep = " "),
date = lubridate::date(signed_up)
) %>%
filter(!(full_name %in% c("Robert Kuan", "James Li", "Kaylin Rochford", "Saurabh Khanna", "Dingchen Sha"))) %>%
filter(date > "2022-02-18", !is.na(version)) %>%
select(-contains("name"), -profile_pic_url, -messenger_user_id) %>%
mutate(original_ref = parse_number(original_ref)) %>%
remove_empty()
df_v5 <-
df_full_v5 %>%
filter(version == "pilot_5", full_complete == "yes") %>%
drop_na(vax_status) %>%
mutate(
phone_number = str_replace_all(phone_number, " ", ""),
phone_number = str_replace_all(phone_number, "-", ""),
) %>%
arrange(phone_number, last_seen) %>%
distinct(phone_number, .keep_all = T) %>%
remove_empty()
# combining chatfuel data with ads data. we use `df` in most analyses below.
df <-
bind_rows(
df_v6 %>% left_join(ads_v6, by = "original_ref"),
df_v5 %>% left_join(ads_v5, by = "original_ref"),
) %>%
remove_empty()
# combining full datatsets
df_full <-
bind_rows(
df_full_v6 %>% left_join(ads_v6, by = "original_ref"),
df_full_v5 %>% left_join(ads_v5, by = "original_ref")
) %>%
remove_empty()
ads <- bind_rows(ads_v5, ads_v6) %>% remove_empty()
## Keep pilot 4b for some graphs as well
# full data for 4b wave
df_full_v4b <-
read_csv(here::here("data/Social_Impact_Research_Initiative_2022_02_04_17_40_24.csv")) %>%
clean_names() %>%
mutate_if(is.character, ~ str_replace_all(., '[\n\t]', '')) %>%
mutate(
full_name = str_c(first_name, last_name, sep = " ")
) %>%
filter(!(full_name %in% c("Robert Kuan", "James Li", "Kaylin Rochford", "Saurabh Khanna", "Dingchen Sha"))) %>%
select(-contains("name"), -profile_pic_url, -messenger_user_id) %>%
mutate(original_ref = parse_number(original_ref)) %>%
remove_empty()
# full data for 4a wave
df_full_v4a <-
read_csv(here::here("data/old/Share_Your_Voice_2022_01_18_07_51_30.csv")) %>%
clean_names() %>%
mutate_if(is.character, ~ str_replace_all(., '[\n\t]', '')) %>%
mutate(
full_name = str_c(first_name, last_name, sep = " "),
treatment_complete = if_else(vax_status == "unvax" & vax_future == "no way!", "yes", treatment_complete) # pilot v4 error corrected here
) %>%
filter(!(full_name %in% c("Robert Kuan", "James Li", "Kaylin Rochford", "Saurabh Khanna", "Dingchen Sha"))) %>%
select(-contains("name"), -profile_pic_url, -messenger_user_id) %>%
filter(ad_set %in% c("pilot_v4_1", "pilot_v4_2", "pilot_v4_3")) %>%
mutate(version = "pilot_4a") %>%
remove_empty()
# for the free text charts
df_text <-
bind_rows(
df_full_v5 %>% select(-original_ref),
df_full_v4a %>% select(-original_ref),
df_full_v4b %>% select(-original_ref)
) %>%
filter(full_complete == "yes", vax_status == "unvax") %>%
remove_empty()
# merging cleaned free text
motive_df <-
readxl::read_excel(here::here("data/freetext/free_text_responses_tillv5.xlsx"), sheet = "motive_other") %>%
select(1:2) %>%
remove_empty()
ability_df <-
readxl::read_excel(here::here("data/freetext/free_text_responses_tillv5.xlsx"), sheet = "ability_other") %>%
select(1:2) %>%
remove_empty()
treatment_df <-
readxl::read_excel(here::here("data/freetext/free_text_responses_tillv5.xlsx"), sheet = "best_treatment_other") %>%
select(1:2) %>%
remove_empty()
df_text <-
df_text %>%
left_join(motive_df) %>%
left_join(ability_df) %>%
left_join(treatment_df)# clean up demographic variables
clean_up_demog <- function(df){
df$gender[!df$gender %in% c("male", "female", NA_character_)] <- "other"
df$ethnicity[!df$ethnicity %in% c("black or african", "coloured", "white or caucasian", "prefer not to say", "asian or indian", NA_character_)] <- "other"
df$income[!df$income %in% c("< R5,000", "R5,000 – R9,999", "R10,000 – R29,999", "R30,000 – R49,999", "R50,000 – R99,999", "> R100,000", "prefer not to say", NA_character_)] <- "other"
df$education[!df$education %in% c("< high school", "high school", "some college", "2-year degree", "4-year degree", "graduate degree", "prefer not to say", NA_character_)] <- "other"
df$politics[!df$politics %in% c("conservative", "moderate", "liberal", "prefer not to say", NA_character_)] <- "other"
df$location[!df$location %in% c("urban", "suburban", "rural", "prefer not to say", NA_character_)] <- "other"
df$religion[grep("christ", tolower(df$religion))] <- "christian"
df$religion[!df$religion %in% c("christian", "african traditional", "islam", "hinduism", "no religion", "prefer not to say", NA_character_)] <- "other"
return(df)
}
df <- clean_up_demog(df)Background
COVID-19 vaccine hesitancy has been recognized as a problem across nations. A resistance to getting vaccinated is emerging as a major hurdle, especially in the developing world, where vaccine access issues are still being gradually resolved. Persistent pools of unvaccinated people around the world could present a greater risk for the emergence of new variants of concern. Addressing people’s vaccine hesitancy is hence crucial to curb the spread of COVID-19, and to consequently avert hospitalizations and death.
Objectives
We intend to understand why people are hesitant about getting the COVID-19 vaccine. Hesitancy could not only occur within the unvaccinated population but also in a subset of people who already got vaccinated. Therefore, the first phase of our project has the following objectives:
Approach
We intend to use chatbot as a medium (on Facebook) to conduct conversations with people and understand how we can best achieve the above three objectives. We have run six pilots as of March 2022 – 2 in the United States using Lucid, and 4 in South Africa on Facebook. Our eventual goal will be running this using an interactive, personalized chatbot that enables the conversation to flow more naturally than in a survey format. We are running the pilots in order to – 1) achieve technical proofs of concept, 2) reduce participant recruitment & completion costs (survey completion of unvaccinated participants open to treatment ) before experiment launch, 3) improve chatbot script/forking/engagement before experiment launch, and 4) gather exploratory ideas for impactful covariates and treatments. Our insights from these pilots are detailed ahead.
Before starting with our key takeaways from previous pilots, here is a glossary of different detailed analyses scripts that we have generated so far:
The current analysis uses data from 3456 respondents combined across our two largest pilots – pilots 5 (1621 respondents in Feb 2022) and 6 (1835 respondents in March 2022).
Analysis script used for generating these results is linked here. Related GitHub issue is linked here.
We have split our key takeaways into 5 sections:
Vaccinated people can die being a frequent free text
response we do not have]. Around 24% responses in the no-motivation fork
and 19% responses in the no-ability fork are redundant. Mis-forking
(i.e. being assigned to the wrong fork) is negligible for the motivation
fork, but at 10% for the ability fork (i.e. 10% respondents here should
be in the motivation fork).1. We see a shift in the percentage of vaccinated people among respondents (38% in pilot 4 in January –> 62% in the latest pilot 6), reflecting South Africa’s current situation after the Omicron wave.
bind_rows(
df_full_v4b %>% filter(full_complete == "yes"),
df_v5,
df_v6
) %>%
transmute(
version,
vaxxed = (vax_status == "vax")
) %>%
group_by(version) %>%
summarise(
vax_mean = mean(vaxxed, na.rm = T),
vax_sd = sd(vaxxed, na.rm = T),
vax_n = sum(!is.na(vaxxed)),
vax_se = vax_sd / sqrt(vax_n)
) %>%
mutate(
confint_low = vax_mean - 1.96*vax_se,
confint_high = vax_mean + 1.96*vax_se,
) %>%
ggplot() +
geom_linerange(aes(x = version, ymin = confint_low, ymax = confint_high), width = 0.5) +
geom_point(aes(x = version, y = vax_mean), size = 4, alpha = 0.8, color = "blue") +
geom_hline(yintercept = 0.5, linetype = "dashed") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1), breaks = scales::breaks_width(0.1)) +
theme_bw() +
labs(
y = "% Vaccinated",
title = "Rising vaccination status in South Africa (Jan'22 → March'22)",
subtitle = "Error bars represent 95% confidence intervals"
)2. Our sample demopgraphic is dominated by individuals who are black/african (80%), females (60%), have education till high school (60%), and earn less than 5000 ZAR (315 USD) per year (43%).
The table below provides an overview of relevant demographic variables in our sample. Roughly, we have:
df %>%
mutate_if(is_character, ~ as_factor(.)) %>%
select(contains(c("gender", "income", "ethnicity", "education", "religion", "politics", "location"))) %>%
select(-cv_gender) %>%
clean_names(case = "title") %>%
papeR::summarize_factor()Here are stacked bar charts representing different demographic variables:
df %>%
mutate_if(is_character, ~ as_factor(.)) %>%
select(contains(c("gender", "income", "ethnicity", "education", "religion", "politics", "location"))) %>%
select(-cv_gender) %>%
clean_names(case = "title") %>%
papeR::summarize_factor() %>%
clean_names() %>%
transmute(x = na_if(x, ""), level, percent = parse_double(percent)/100) %>%
fill(x) %>%
filter(x == "Gender") %>%
ggplot(aes(x, percent)) +
geom_col(aes(fill = level), color = "black") +
theme_minimal() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1), breaks = scales::breaks_width(0.1)) +
coord_flip() +
labs(
x = "", y = "", fill = ""
)df %>%
mutate_if(is_character, ~ as_factor(.)) %>%
select(contains(c("gender", "income", "ethnicity", "education", "religion", "politics", "location"))) %>%
select(-cv_gender) %>%
clean_names(case = "title") %>%
papeR::summarize_factor() %>%
clean_names() %>%
transmute(x = na_if(x, ""), level, percent = parse_double(percent)/100) %>%
fill(x) %>%
filter(x == "Education") %>%
ggplot(aes(x, percent)) +
geom_col(aes(fill = level), color = "black") +
theme_minimal() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1), breaks = scales::breaks_width(0.1)) +
coord_flip() +
labs(
x = "", y = "", fill = ""
)df %>%
mutate_if(is_character, ~ as_factor(.)) %>%
select(contains(c("gender", "income", "ethnicity", "education", "religion", "politics", "location"))) %>%
select(-cv_gender) %>%
clean_names(case = "title") %>%
papeR::summarize_factor() %>%
clean_names() %>%
transmute(x = na_if(x, ""), level, percent = parse_double(percent)/100) %>%
fill(x) %>% #count(x)
filter(x == "Ethnicity") %>%
ggplot(aes(x, percent)) +
geom_col(aes(fill = level), color = "black") +
theme_minimal() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1), breaks = scales::breaks_width(0.1)) +
coord_flip() +
labs(
x = "", y = "", fill = ""
)df %>%
mutate_if(is_character, ~ as_factor(.)) %>%
select(contains(c("gender", "income", "ethnicity", "education", "religion", "politics", "location"))) %>%
select(-cv_gender) %>%
clean_names(case = "title") %>%
papeR::summarize_factor() %>%
clean_names() %>%
transmute(x = na_if(x, ""), level, percent = parse_double(percent)/100) %>%
fill(x) %>% #count(x)
filter(x == "Income") %>%
ggplot(aes(x, percent)) +
geom_col(aes(fill = level), color = "black") +
theme_minimal() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1), breaks = scales::breaks_width(0.1)) +
coord_flip() +
labs(
x = "", y = "", fill = ""
)df %>%
mutate_if(is_character, ~ as_factor(.)) %>%
select(contains(c("gender", "income", "ethnicity", "education", "religion", "politics", "location"))) %>%
select(-cv_gender) %>%
clean_names(case = "title") %>%
papeR::summarize_factor() %>%
clean_names() %>%
transmute(x = na_if(x, ""), level, percent = parse_double(percent)/100) %>%
fill(x) %>% #count(x)
filter(x == "Religion") %>%
ggplot(aes(x, percent)) +
geom_col(aes(fill = level), color = "black") +
theme_minimal() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1), breaks = scales::breaks_width(0.1)) +
coord_flip() +
labs(
x = "", y = "", fill = ""
)df %>%
mutate_if(is_character, ~ as_factor(.)) %>%
select(contains(c("gender", "income", "ethnicity", "education", "religion", "politics", "location"))) %>%
select(-cv_gender) %>%
clean_names(case = "title") %>%
papeR::summarize_factor() %>%
clean_names() %>%
transmute(x = na_if(x, ""), level, percent = parse_double(percent)/100) %>%
fill(x) %>% #count(x)
filter(x == "Politics") %>%
ggplot(aes(x, percent)) +
geom_col(aes(fill = level), color = "black") +
theme_minimal() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1), breaks = scales::breaks_width(0.1)) +
coord_flip() +
labs(
x = "", y = "", fill = ""
)df %>%
mutate_if(is_character, ~ as_factor(.)) %>%
select(contains(c("gender", "income", "ethnicity", "education", "religion", "politics", "location"))) %>%
select(-cv_gender) %>%
clean_names(case = "title") %>%
papeR::summarize_factor() %>%
clean_names() %>%
transmute(x = na_if(x, ""), level, percent = parse_double(percent)/100) %>%
fill(x) %>% #count(x)
filter(x == "Location") %>%
ggplot(aes(x, percent)) +
geom_col(aes(fill = level), color = "black") +
theme_minimal() +
scale_y_continuous(labels = scales::percent_format(accuracy = 1), breaks = scales::breaks_width(0.1)) +
coord_flip() +
labs(
x = "", y = "", fill = ""
)Here is a comparison of vaccinated and unvaccinated individuals by demographic. Unvaccinated sample includes more males than vaccinated sample, and is less likely to disclose their income.
df %>%
mutate_if(is_character, ~ as_factor(.)) %>%
select(vax_status, contains(c("gender", "income", "ethnicity", "education", "religion", "politics", "location"))) %>%
select(-cv_gender) %>%
compareGroups(vax_status ~ ., data = .) %>%
createTable(show.all = F, show.p.overall = T)##
## --------Summary descriptives table by 'vax_status'---------
##
## ___________________________________________________________
## unvax vax p.overall
## N=1361 N=2095
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## gender: 0.003
## female 760 (56.1%) 1278 (61.3%)
## male 594 (43.9%) 808 (38.7%)
## income: <0.001
## < R5,000 519 (38.1%) 965 (46.1%)
## other 43 (3.16%) 127 (6.06%)
## prefer not to say 393 (28.9%) 405 (19.3%)
## R5,000 – R9,999 152 (11.2%) 260 (12.4%)
## R50,000 – R99,999 46 (3.38%) 47 (2.24%)
## > R100,000 63 (4.63%) 68 (3.25%)
## R10,000 – R29,999 91 (6.69%) 171 (8.16%)
## R30,000 – R49,999 54 (3.97%) 52 (2.48%)
## ethnicity: <0.001
## black or african 1065 (78.3%) 1666 (79.6%)
## coloured 113 (8.30%) 144 (6.88%)
## other 41 (3.01%) 102 (4.87%)
## prefer not to say 50 (3.67%) 31 (1.48%)
## white or caucasian 81 (5.95%) 114 (5.44%)
## asian or indian 11 (0.81%) 37 (1.77%)
## education: <0.001
## high school 528 (38.8%) 768 (36.7%)
## < high school 302 (22.2%) 569 (27.2%)
## other 29 (2.13%) 89 (4.25%)
## some college 281 (20.7%) 400 (19.1%)
## 2-year degree 50 (3.68%) 52 (2.48%)
## prefer not to say 77 (5.66%) 77 (3.68%)
## graduate degree 56 (4.12%) 78 (3.72%)
## 4-year degree 37 (2.72%) 62 (2.96%)
## religion: <0.001
## christian 1008 (74.1%) 1654 (78.9%)
## african traditional 88 (6.47%) 125 (5.97%)
## prefer not to say 73 (5.36%) 69 (3.29%)
## other 56 (4.11%) 77 (3.68%)
## islam 39 (2.87%) 55 (2.63%)
## no religion 93 (6.83%) 91 (4.34%)
## hinduism 4 (0.29%) 24 (1.15%)
## politics: 0.005
## prefer not to say 679 (49.9%) 929 (44.3%)
## conservative 179 (13.2%) 347 (16.6%)
## liberal 148 (10.9%) 225 (10.7%)
## moderate 337 (24.8%) 553 (26.4%)
## other 17 (1.25%) 41 (1.96%)
## location: <0.001
## urban 467 (34.3%) 726 (34.7%)
## rural 508 (37.3%) 812 (38.8%)
## prefer not to say 94 (6.91%) 88 (4.20%)
## suburban 287 (21.1%) 434 (20.7%)
## other 5 (0.37%) 34 (1.62%)
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
3. Associations among demographic variables are low, except for a moderate association between education and income.
The correlation plot among just demographic variables can be seen below. We map binary and ordinal variables to continuous variables as follows:
female: 1 if female, 0 if maleincome: 0 if the participant is unemployed, 1 if
household income < R5,000, 2 if household income in R5,000 – R9,999,
…, 6 if household income > R100,000education: 1 if the participant’s education < high
school, 2 if education is high school, …, 6 if education is a graduate
degreereligiosity: 1 if the participant is not very
religious, 2 if somewhat religious, 3 if very religiouspolitics: 1 if the participant is conservative, 2 if
moderate, 3 if liberallocation: 1 if the participant lives in rural, 2 if
suburban, 3 if urban,white: 1 if the participant is a white or caucasian, 0
if notWeak correlations here weed out multicollinearity concerns when using these variables together in logit models.
df_mc_numeric <-
df %>%
mutate(
female = case_when(
gender == "female" ~ 1,
gender == "male" ~ 0,
),
income = case_when(
income == "Unemployed" ~ 0,
income == "< R5,000" ~ 1,
income == "R5,000 – R9,999" ~ 2,
income == "R10,000 – R29,999" ~ 3,
income == "R30,000 – R49,999" ~ 4,
income == "R50,000 – R99,999" ~ 5,
income == "> R100,000" ~ 6,
),
education = case_when(
education == "< high school" ~ 1,
education == "high school" ~ 2,
education == "some college" ~ 3,
education == "2-year degree" ~ 4,
education == "4-year degree" ~ 5,
education == "graduate degree" ~ 6,
),
religiosity = case_when(
religiosity == "not very religious" ~ 1,
religiosity == "somewhat religious" ~ 2,
religiosity == "very religious" ~ 3,
),
politics = case_when(
politics == "conservative" ~ 1,
politics == "moderate" ~ 2,
politics == "liberal" ~ 3,
),
location = case_when(
location == "rural" ~ 1,
location == "suburban" ~ 2,
location == "urban" ~ 3,
),
white = case_when(
ethnicity == "white or caucasian" ~ 1,
ethnicity != "white or caucasian" ~ 0
),
ability = case_when(
ability == "no" ~ 0,
ability == "yes" ~ 1,
),
motive = case_when(
motive == "no" ~ 0,
motive == "yes" ~ 1,
),
treat_nchar = nchar(best_treatment_explain),
unvax = if_else(vax_status == "unvax", 1L, 0L)
) %>%
select("female", "income", "education", "religiosity", "politics", "location", "white", "unvax")
ggcorrplot(cor(df_mc_numeric, use = "pairwise.complete.obs"), type = "lower", lab = TRUE, lab_size = 3, tl.cex = 10) +
ggtitle("Correlation Matrix (demographic variables only)")1. Within unvaccinated respondents, a majority lack motivation to get the vaccine.
The last column in the table below shows the distribution of impediments within each vaccination status.
For instance, among the unvaccinated respondents:
For instance, among the vaccinated respondents:
df %>%
group_by(vax_status, ability, motive) %>%
summarise(count = n()) %>%
drop_na(ability, motive) %>%
group_by(vax_status) %>%
mutate(`% of vax status` = round(100 * count/sum(count, na.rm = T), 2) %>% str_c(., "%")) %>%
ungroup() %>%
arrange(vax_status, desc(count)) %>%
kable(col.names =
c("Vaccination status", "Able to get vaccine", "Have motivation to get vaccine", "Count", "Percentage of vax status"),
align = "c",
caption = "Distribution of forking segments of participants' impediments") %>%
kable_styling()| Vaccination status | Able to get vaccine | Have motivation to get vaccine | Count | Percentage of vax status |
|---|---|---|---|---|
| unvax | yes | no | 839 | 61.65% |
| unvax | no | no | 245 | 18% |
| unvax | yes | yes | 233 | 17.12% |
| unvax | no | yes | 44 | 3.23% |
| vax | yes | yes | 1151 | 54.97% |
| vax | yes | no | 694 | 33.14% |
| vax | no | yes | 131 | 6.26% |
| vax | no | no | 118 | 5.64% |
2. As vaccination rates rise, motivation impediments shift from “No benefits” to “Risky”.
Note: These trends are purely descriptive though, as these pilots were administered to different participants, and we cannot know if samples in either pilot are largely similar.
df_temp <-
df %>%
transmute(version, motive_main) %>%
drop_na(motive_main) %>%
filter(motive_main != "other") %>%
fastDummies::dummy_cols(select_columns = c("motive_main")) %>%
group_by(version) %>%
summarise(
mean_belief = mean(motive_main_belief, na.rm = T),
sd_belief = sd(motive_main_belief, na.rm = T),
belief_n = sum(!is.na(motive_main_belief)),
se_belief = sd_belief / sqrt(belief_n),
mean_benefit = mean(motive_main_benefit, na.rm = T),
sd_benefit = sd(motive_main_benefit, na.rm = T),
n_benefit = sum(!is.na(motive_main_benefit)),
se_benefit = sd_benefit / sqrt(n_benefit),
mean_risk = mean(motive_main_risk, na.rm = T),
sd_risk = sd(motive_main_risk, na.rm = T),
n_risk = sum(!is.na(motive_main_risk)),
se_risk = sd_risk / sqrt(n_risk)
) %>%
ungroup() %>%
transmute(
version,
mean_belief, mean_benefit, mean_risk,
ci_low_belief = mean_belief - 1.96 * se_belief,
ci_high_belief = mean_belief + 1.96 * se_belief,
ci_low_benefit = mean_benefit - 1.96 * se_benefit,
ci_high_benefit = mean_benefit + 1.96 * se_benefit,
ci_low_risk = mean_risk - 1.96 * se_risk,
ci_high_risk = mean_risk + 1.96 * se_risk
) %>%
pivot_longer(cols = -version)
df_temp %>%
filter(str_detect(name, "mean")) %>%
transmute(version, name = str_sub(name, 6, -1), mean = value) %>%
inner_join(
df_temp %>% filter(str_detect(name, "ci_low")) %>% transmute(version, name = str_sub(name, 8, -1), ci_low = value)
) %>%
inner_join(
df_temp %>% filter(str_detect(name, "ci_high")) %>% transmute(version, name = str_sub(name, 9, -1), ci_high = value)
) %>%
ggplot() +
geom_linerange(aes(x = name, ymin = ci_low, ymax = ci_high), width = 0.5) +
geom_point(aes(x = name, y = mean), size = 4, alpha = 0.8, color = "blue") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1), breaks = scales::breaks_width(0.1)) +
theme_bw() +
facet_wrap(vars(version)) +
labs(
x = "Impediment",
y = "% respondents",
title = "Impediment shift from 'No benefits' to 'Risky' (Feb'22 → March'22)",
subtitle = "Error bars represent 95% confidence intervals"
)rm(df_temp)3. Within the small set (20%) of respondents with ability impediments across pilots, a majority mention (50%) lack of time.
df %>%
filter(ability_main %in% c("availability", "time", "money")) %>%
transmute(ability_main) %>%
fastDummies::dummy_cols(select_columns = c("ability_main")) %>%
summarise(
mean1 = mean(ability_main_time, na.rm = T),
sd1 = sd(ability_main_time, na.rm = T),
n1 = sum(!is.na(ability_main_time)),
se1 = sd1 / sqrt(n1),
mean2 = mean(ability_main_money, na.rm = T),
sd2 = sd(ability_main_money, na.rm = T),
n2 = sum(!is.na(ability_main_money)),
se2 = sd2 / sqrt(n2),
mean3 = mean(ability_main_availability, na.rm = T),
sd3 = sd(ability_main_availability, na.rm = T),
n3 = sum(!is.na(ability_main_availability)),
se3 = sd3 / sqrt(n3),
) %>%
mutate(
id = 1,
ci_low1 = mean1 - 1.96*se1,
ci_high1 = mean1 + 1.96*se1,
ci_low2 = mean2 - 1.96*se2,
ci_high2 = mean2 + 1.96*se2,
ci_low3 = mean3 - 1.96*se3,
ci_high3 = mean3 + 1.96*se3,
) %>%
pivot_longer(cols = -id) %>%
transmute(
measure = str_sub(name, 1, -2),
imped = str_sub(name, -1, -1),
value
) %>%
arrange(measure, imped) %>%
pivot_wider(names_from = measure, values_from = value) %>%
mutate(
imped = case_when(
imped == 1 ~ "Time",
imped == 2 ~ "Money",
imped == 3 ~ "Availability",
) %>% factor(levels = c("Time", "Availability", "Money"))
) %>%
ggplot() +
# geom_col(aes(x = imped, y = mean), alpha = 0.8) +
# geom_errorbar(aes(x = imped, ymin = ci_low, ymax = ci_high), width = 0.4) +
geom_linerange(aes(x = imped, ymin = ci_low, ymax = ci_high), width = 0.5) +
geom_point(aes(x = imped, y = mean), size = 4, alpha = 0.8, color = "blue") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1), breaks = scales::breaks_width(0.1)) +
theme_bw() +
labs(
x = "Ability Impediment",
y = "% Respondents with Ability Impediment",
title = "Majority ability impediments relate to lack of time",
subtitle = "Error bars represent 95% confidence intervals"
)4. Free text responses for impediments are both sparse, and
not far from our encoded responses [except for
Vaccinated people can die being a frequent free text
response we do not have]. Around 24% responses in the no-motivation fork
and 19% responses in the no-ability fork are redundant. Mis-forking
(i.e. being assigned to the wrong fork) is negligible for the motivation
fork, but at 10% for the ability fork (i.e. 10% respondents here should
be in the motivation fork).
A free text response is labeled redundant if it does not convey any information relevant to the question asked. For instance, “Can you say more?” can have redundant responses like “No”, “Yes”, “gibberish@#4”.
A free text response is labeled misforked if it mentions a
reason that does not match the fork the participant is in. For instance,
if a participant in the
no ability, yes motivation to get vaxxed fork mentions a
reason showing they have no motivation to get vaxxed, they are
considered misforked.
The charts below are created from cleaned and standardized free text responses. A couple of notes:
Motivation impediments
Question: Are there other reasons why you might not want the vaccine?
df_text %>%
count(motive_other_clean) %>%
arrange(n) %>%
drop_na(motive_other_clean) %>%
mutate(
motive_other_clean = fct_inorder(motive_other_clean),
encoded = (motive_other_clean %in% c("bad side effects", "dont trust pharma", "freedom to choose", "vaccines dont work", "religious reasons", "dont trust government", "unlikely to get sick", "no benefit", "needles/pain", "had covid already", "i can recover", "covid not dangerous")),
) %>%
filter(motive_other_clean != "no", motive_other_clean != "redundant") %>%
ggplot(aes(motive_other_clean, n, fill = encoded)) +
geom_col() +
coord_flip() +
theme_bw() +
theme(legend.position = "bottom") +
labs(
y = "Respondent count",
x = "Motivation impediment text",
fill = "Encoded in chatbot"
)Proportion of redundant responses: 24%
df_text %>%
mutate(is_redundant = (motive_other_clean == "redundant")) %>%
drop_na(motive_other_clean) %>%
tabyl(is_redundant) %>%
as_tibble() %>%
select(is_redundant, percent) %>%
kable(format = "pipe")| is_redundant | percent |
|---|---|
| FALSE | 0.757874 |
| TRUE | 0.242126 |
Ability impediments
Question: Are there other reasons it might be hard to get the vaccine?
df_text %>%
count(ability_other_clean) %>%
arrange(n) %>%
drop_na(ability_other_clean) %>%
mutate(
ability_other_clean = fct_inorder(ability_other_clean),
encoded = (ability_other_clean %in% c("no motive", "too far away", "travel costs", "getting off work", "no cash", "no vaccines left"))
) %>%
filter(ability_other_clean != "no", ability_other_clean != "redundant") %>%
ggplot(aes(ability_other_clean, n, fill = encoded)) +
geom_col() +
coord_flip() +
theme_ipsum() +
theme(legend.position = "bottom") +
labs(
y = "Respondent count",
x = "Ability impediment text",
fill = "Encoded in chatbot"
)Proportion of redundant responses: 19%
df_text %>%
mutate(is_redundant = (ability_other_clean == "redundant")) %>%
drop_na(ability_other_clean) %>%
tabyl(is_redundant) %>%
as_tibble() %>%
select(is_redundant, percent) %>%
kable(format = "pipe")| is_redundant | percent |
|---|---|
| FALSE | 0.8058252 |
| TRUE | 0.1941748 |
Proportion of misforked responses: 10%
df_text %>%
mutate(is_redundant = (ability_other_clean == "no motive")) %>%
drop_na(ability_other_clean) %>%
tabyl(is_redundant) %>%
as_tibble() %>%
select(is_misforked = is_redundant, percent) %>%
kable(format = "pipe")| is_misforked | percent |
|---|---|
| FALSE | 0.9029126 |
| TRUE | 0.0970874 |
5. A multinomial logit model with polynomial interactions among demographics, or even a random forst model, does not predict motivation or ability impediments well for unvaccinated respondents. This is also corroborated by the correlation plots.
set.seed(929292911)
# multinomial logit
df_mlm <-
df %>%
transmute(
female = case_when(
gender == "female" ~ 1,
gender == "male" ~ 0,
),
income = case_when(
income == "Unemployed" ~ 0,
income == "< R5,000" ~ 1,
income == "R5,000 – R9,999" ~ 2,
income == "R10,000 – R29,999" ~ 3,
income == "R30,000 – R49,999" ~ 4,
income == "R50,000 – R99,999" ~ 5,
income == "> R100,000" ~ 6,
),
education = case_when(
education == "< high school" ~ 1,
education == "high school" ~ 2,
education == "some college" ~ 3,
education == "2-year degree" ~ 4,
education == "4-year degree" ~ 5,
education == "graduate degree" ~ 6,
),
religiosity = case_when(
religiosity == "not very religious" ~ 1,
religiosity == "somewhat religious" ~ 2,
religiosity == "very religious" ~ 3,
),
politics = case_when(
politics == "conservative" ~ 1,
politics == "moderate" ~ 2,
politics == "liberal" ~ 3,
),
location = case_when(
location == "rural" ~ 1,
location == "suburban" ~ 2,
location == "urban" ~ 3,
),
white = case_when(
ethnicity == "white or caucasian" ~ 1,
ethnicity != "white or caucasian" ~ 0
),
no_ability = case_when(
ability == "no" ~ 1,
ability == "yes" ~ 0,
),
no_motive = case_when(
motive == "no" ~ 1,
motive == "yes" ~ 0,
),
against_beliefs = if_else(motive_main == "belief", 1L, 0L) %>% replace_na(0L),
no_benefits = if_else(motive_main == "benefit", 1L, 0L) %>% replace_na(0L),
risky = if_else(motive_main == "risk", 1L, 0L) %>% replace_na(0L),
treat_nchar = nchar(best_treatment_explain) %>% replace_na(0L),
unvax = if_else(vax_status == "unvax", 1L, 0L),
impediment = case_when(
motive_main == "belief" ~ "Against beliefs",
motive_main == "benefit" ~ "No benefits",
motive_main == "risk" ~ "Risky",
TRUE ~ "No impediment"
) %>% as_factor(),
vax_status
) %>%
filter(vax_status == "unvax")
df_mlm$impediment <- relevel(df_mlm$impediment, ref = "No impediment")
trainingRows <- sample(1:nrow(df_mlm), 0.7 * nrow(df_mlm))
training <- df_mlm[trainingRows, ]
test <- df_mlm[-trainingRows, ]
mlm1 <-
multinom(impediment ~ female + education + income + religiosity + politics + location + white, data = training)
mlm2 <-
multinom(impediment ~ female * education * income * religiosity * politics * location * white, data = training)Motivation impediments
Multinomial Logit
Baseline model with 70/30 train/test split and no interactions
[impediment ~ female + education + income + religiosity + politics + location + white]
for unvaccinated respondents does a bad job of predicting true
motivation impediments. Misclassification error for this model is too
high at 68%.
predicted_class <- predict(mlm1, test)
table(`Predicted class` = predicted_class, `True class` = test$impediment)## True class
## Predicted class No impediment Risky No benefits Against beliefs
## No impediment 14 15 13 4
## Risky 14 17 19 11
## No benefits 7 5 12 3
## Against beliefs 0 0 0 0
mean(as.character(predicted_class) != as.character(test$impediment), na.rm = T)## [1] 0.6791045
A slightly more complex model with interactions among all independent variables for unvaccinated respondents does a bad job too, as misclassification error is at 67%.
predicted_class <- predict(mlm2, test)
table(`Predicted class` = predicted_class, `True class` = test$impediment)## True class
## Predicted class No impediment Risky No benefits Against beliefs
## No impediment 15 12 18 9
## Risky 9 8 16 6
## No benefits 9 10 5 3
## Against beliefs 2 7 5 0
mean(as.character(predicted_class) != as.character(test$impediment), na.rm = T)## [1] 0.7910448
Random Forest
We also try a random forest model with 70/30 train/test splitfor this task. The misclassification error still stays as high as 69%.
# impute mising values before feeding into RF
training <-
training %>%
simputation::impute_lm(politics + income + religiosity + location + education ~ female + white) %>%
drop_na()
rf_model <-
randomForest::randomForest(impediment ~ female + education + income + religiosity + politics + location + white, data = training, importance = TRUE)
predicted_class <- predict(rf_model, test)
table(`Predicted class` = predicted_class, `True class` = test$impediment)## True class
## Predicted class No impediment Risky No benefits Against beliefs
## No impediment 17 13 12 9
## Risky 14 19 22 6
## No benefits 4 4 10 3
## Against beliefs 0 1 0 0
mean(as.character(predicted_class) != as.character(test$impediment), na.rm = T)## [1] 0.6567164
Given the high misclassification error, the variable importance based on Mean decrease accuracy and Mean decrease Gini are inconsistent.
MeanDecreaseAccuracy gives a rough estimate of the loss
in prediction performance when that particular variable is omitted from
the training set.MeanDecreaseGini: GINI is a measure of node impurity.
Highest purity means that each node contains only elements of a single
class. Assessing the decrease in GINI when that feature is omitted leads
to an understanding of how important that feature is to split the data
correctly.Note: Both these measures are used to rank variables in terms of importance and their absolute values could be disregarded.
randomForest::varImpPlot(rf_model, sort = T, main = "Random Forest - Variable Importance")Correlation Plots
As a next step, the correlation plots below show correlation coefficients between continuous variables. We map binary and ordinal variables to continuous variables as follows:
female: 1 if female, 0 if maleincome: 0 if the participant is unemployed, 1 if
household income < R5,000, 2 if household income in R5,000 – R9,999,
…, 6 if household income > R100,000education: 1 if the participant’s education < high
school, 2 if education is high school, …, 6 if education is a graduate
degreereligiosity: 1 if the participant is not very
religious, 2 if somewhat religious, 3 if very religiouspolitics: 1 if the participant is conservative, 2 if
moderate, 3 if liberallocation: 1 if the participant lives in rural, 2 if
suburban, 3 if urban,white: 1 if the participant is a white or caucasian, 0
if notno_motive: 1 if the participant does not have the
motive to get vax, 0 otherwise
no_benefits: 1 if participant believes vaccine has no
benefits, 0 otherwiseagainst_beliefs:: 1 if participant believes vaccine is
against their beliefs, 0 otherwiserisky:: 1 if participant believes vaccine is risky, 0
otherwiseno_ability: 1 if the participant does not have the
ability to get vax, 0 otherwise
no_time: 1 if participant has no time to get vaxxed, 0
otherwiseno_money: 1 if participant has no money to get vaxxed,
0 otherwiseno_availability: 1 if participant faces vax
availability issues, 0 otherwisedf_mc_numeric <-
df %>%
mutate(
female = case_when(
gender == "female" ~ 1,
gender == "male" ~ 0,
),
income = case_when(
income == "Unemployed" ~ 0,
income == "< R5,000" ~ 1,
income == "R5,000 – R9,999" ~ 2,
income == "R10,000 – R29,999" ~ 3,
income == "R30,000 – R49,999" ~ 4,
income == "R50,000 – R99,999" ~ 5,
income == "> R100,000" ~ 6,
),
education = case_when(
education == "< high school" ~ 1,
education == "high school" ~ 2,
education == "some college" ~ 3,
education == "2-year degree" ~ 4,
education == "4-year degree" ~ 5,
education == "graduate degree" ~ 6,
),
religiosity = case_when(
religiosity == "not very religious" ~ 1,
religiosity == "somewhat religious" ~ 2,
religiosity == "very religious" ~ 3,
),
politics = case_when(
politics == "conservative" ~ 1,
politics == "moderate" ~ 2,
politics == "liberal" ~ 3,
),
location = case_when(
location == "rural" ~ 1,
location == "suburban" ~ 2,
location == "urban" ~ 3,
),
white = case_when(
ethnicity == "white or caucasian" ~ 1,
ethnicity != "white or caucasian" ~ 0
),
no_ability = case_when(
ability == "no" ~ 1,
ability == "yes" ~ 0,
),
no_motive = case_when(
motive == "no" ~ 1,
motive == "yes" ~ 0,
),
against_beliefs = if_else(motive_main == "belief", 1L, 0L),
no_benefits = if_else(motive_main == "benefit", 1L, 0L),
risky = if_else(motive_main == "risk", 1L, 0L),
treat_nchar = nchar(best_treatment_explain),
unvax = if_else(vax_status == "unvax", 1L, 0L)
) %>%
select("no_motive", "female", "income", "education", "religiosity", "politics", "location", "white")
mat1 <- cor(df_mc_numeric, use = "pairwise.complete.obs")[1, 2:8]
mat1_p <- cor_pmat(df_mc_numeric)[1, 2:8]df_mc_numeric <-
df %>%
mutate(
female = case_when(
gender == "female" ~ 1,
gender == "male" ~ 0,
),
income = case_when(
income == "Unemployed" ~ 0,
income == "< R5,000" ~ 1,
income == "R5,000 – R9,999" ~ 2,
income == "R10,000 – R29,999" ~ 3,
income == "R30,000 – R49,999" ~ 4,
income == "R50,000 – R99,999" ~ 5,
income == "> R100,000" ~ 6,
),
education = case_when(
education == "< high school" ~ 1,
education == "high school" ~ 2,
education == "some college" ~ 3,
education == "2-year degree" ~ 4,
education == "4-year degree" ~ 5,
education == "graduate degree" ~ 6,
),
religiosity = case_when(
religiosity == "not very religious" ~ 1,
religiosity == "somewhat religious" ~ 2,
religiosity == "very religious" ~ 3,
),
politics = case_when(
politics == "conservative" ~ 1,
politics == "moderate" ~ 2,
politics == "liberal" ~ 3,
),
location = case_when(
location == "rural" ~ 1,
location == "suburban" ~ 2,
location == "urban" ~ 3,
),
white = case_when(
ethnicity == "white or caucasian" ~ 1,
ethnicity != "white or caucasian" ~ 0
),
no_ability = case_when(
ability == "no" ~ 1,
ability == "yes" ~ 0,
),
no_motive = case_when(
motive == "no" ~ 1,
motive == "yes" ~ 0,
),
against_beliefs = if_else(motive_main == "belief", 1L, 0L),
no_benefits = if_else(motive_main == "benefit", 1L, 0L),
risky = if_else(motive_main == "risk", 1L, 0L),
treat_nchar = nchar(best_treatment_explain),
unvax = if_else(vax_status == "unvax", 1L, 0L)
) %>%
select("no_benefits", "female", "income", "education", "religiosity", "politics", "location", "white")
mat2 <- cor(df_mc_numeric, use = "pairwise.complete.obs")[1, 2:8]
mat2_p <- cor_pmat(df_mc_numeric)[1, 2:8]df_mc_numeric <-
df %>%
mutate(
female = case_when(
gender == "female" ~ 1,
gender == "male" ~ 0,
),
income = case_when(
income == "Unemployed" ~ 0,
income == "< R5,000" ~ 1,
income == "R5,000 – R9,999" ~ 2,
income == "R10,000 – R29,999" ~ 3,
income == "R30,000 – R49,999" ~ 4,
income == "R50,000 – R99,999" ~ 5,
income == "> R100,000" ~ 6,
),
education = case_when(
education == "< high school" ~ 1,
education == "high school" ~ 2,
education == "some college" ~ 3,
education == "2-year degree" ~ 4,
education == "4-year degree" ~ 5,
education == "graduate degree" ~ 6,
),
religiosity = case_when(
religiosity == "not very religious" ~ 1,
religiosity == "somewhat religious" ~ 2,
religiosity == "very religious" ~ 3,
),
politics = case_when(
politics == "conservative" ~ 1,
politics == "moderate" ~ 2,
politics == "liberal" ~ 3,
),
location = case_when(
location == "rural" ~ 1,
location == "suburban" ~ 2,
location == "urban" ~ 3,
),
white = case_when(
ethnicity == "white or caucasian" ~ 1,
ethnicity != "white or caucasian" ~ 0
),
no_ability = case_when(
ability == "no" ~ 1,
ability == "yes" ~ 0,
),
no_motive = case_when(
motive == "no" ~ 1,
motive == "yes" ~ 0,
),
against_beliefs = if_else(motive_main == "belief", 1L, 0L),
no_benefits = if_else(motive_main == "benefit", 1L, 0L),
risky = if_else(motive_main == "risk", 1L, 0L),
treat_nchar = nchar(best_treatment_explain),
unvax = if_else(vax_status == "unvax", 1L, 0L)
) %>%
select("against_beliefs", "female", "income", "education", "religiosity", "politics", "location", "white")
mat3 <- cor(df_mc_numeric, use = "pairwise.complete.obs")[1, 2:8]
mat3_p <- cor_pmat(df_mc_numeric)[1, 2:8]df_mc_numeric <-
df %>%
mutate(
female = case_when(
gender == "female" ~ 1,
gender == "male" ~ 0,
),
income = case_when(
income == "Unemployed" ~ 0,
income == "< R5,000" ~ 1,
income == "R5,000 – R9,999" ~ 2,
income == "R10,000 – R29,999" ~ 3,
income == "R30,000 – R49,999" ~ 4,
income == "R50,000 – R99,999" ~ 5,
income == "> R100,000" ~ 6,
),
education = case_when(
education == "< high school" ~ 1,
education == "high school" ~ 2,
education == "some college" ~ 3,
education == "2-year degree" ~ 4,
education == "4-year degree" ~ 5,
education == "graduate degree" ~ 6,
),
religiosity = case_when(
religiosity == "not very religious" ~ 1,
religiosity == "somewhat religious" ~ 2,
religiosity == "very religious" ~ 3,
),
politics = case_when(
politics == "conservative" ~ 1,
politics == "moderate" ~ 2,
politics == "liberal" ~ 3,
),
location = case_when(
location == "rural" ~ 1,
location == "suburban" ~ 2,
location == "urban" ~ 3,
),
white = case_when(
ethnicity == "white or caucasian" ~ 1,
ethnicity != "white or caucasian" ~ 0
),
no_ability = case_when(
ability == "no" ~ 1,
ability == "yes" ~ 0,
),
no_motive = case_when(
motive == "no" ~ 1,
motive == "yes" ~ 0,
),
against_beliefs = if_else(motive_main == "belief", 1L, 0L),
no_benefits = if_else(motive_main == "benefit", 1L, 0L),
risky = if_else(motive_main == "risk", 1L, 0L),
treat_nchar = nchar(best_treatment_explain),
unvax = if_else(vax_status == "unvax", 1L, 0L)
) %>%
select("risky", "female", "income", "education", "religiosity", "politics", "location", "white")
mat4 <- cor(df_mc_numeric, use = "pairwise.complete.obs")[1, 2:8]
mat4_p <- cor_pmat(df_mc_numeric)[1, 2:8]final_mat <-
tibble(
mat4, mat3, mat2, mat1
) %>%
as.matrix()
final_mat_p <-
tibble(
mat4_p, mat3_p, mat2_p, mat1_p
) %>%
as.matrix()
rownames(final_mat) <- c("female", "income", "education", "religiosity", "politics", "location", "white")
colnames(final_mat) <- c("risky", "against_beliefs", "no_benefits", "no_motive")
colnames(final_mat_p) <- c("risky", "against_beliefs", "no_benefits", "no_motive")
ggcorrplot(final_mat, lab_size = 3, lab = T, tl.cex = 10)Ability impediments
Multinomial Logit
# multinomial logit
df_mlm <-
df %>%
transmute(
female = case_when(
gender == "female" ~ 1,
gender == "male" ~ 0,
),
income = case_when(
income == "Unemployed" ~ 0,
income == "< R5,000" ~ 1,
income == "R5,000 – R9,999" ~ 2,
income == "R10,000 – R29,999" ~ 3,
income == "R30,000 – R49,999" ~ 4,
income == "R50,000 – R99,999" ~ 5,
income == "> R100,000" ~ 6,
),
education = case_when(
education == "< high school" ~ 1,
education == "high school" ~ 2,
education == "some college" ~ 3,
education == "2-year degree" ~ 4,
education == "4-year degree" ~ 5,
education == "graduate degree" ~ 6,
),
religiosity = case_when(
religiosity == "not very religious" ~ 1,
religiosity == "somewhat religious" ~ 2,
religiosity == "very religious" ~ 3,
),
politics = case_when(
politics == "conservative" ~ 1,
politics == "moderate" ~ 2,
politics == "liberal" ~ 3,
),
location = case_when(
location == "rural" ~ 1,
location == "suburban" ~ 2,
location == "urban" ~ 3,
),
white = case_when(
ethnicity == "white or caucasian" ~ 1,
ethnicity != "white or caucasian" ~ 0
),
no_ability = case_when(
ability == "no" ~ 1,
ability == "yes" ~ 0,
),
no_motive = case_when(
motive == "no" ~ 1,
motive == "yes" ~ 0,
),
against_beliefs = if_else(motive_main == "belief", 1L, 0L),
no_benefits = if_else(motive_main == "benefit", 1L, 0L),
risky = if_else(motive_main == "risk", 1L, 0L),
treat_nchar = nchar(best_treatment_explain),
unvax = if_else(vax_status == "unvax", 1L, 0L),
impediment = case_when(
ability_main == "availability" ~ "No Availability",
ability_main == "money" ~ "No Money",
ability_main == "time" ~ "No Time",
TRUE ~ "No impediment"
) %>% as_factor(),
vax_status
) %>%
filter(vax_status == "unvax")
df_mlm$impediment <- relevel(df_mlm$impediment, ref = "No impediment")
trainingRows <- sample(1:nrow(df_mlm), 0.7 * nrow(df_mlm))
training <- df_mlm[trainingRows, ]
test <- df_mlm[-trainingRows, ]
mlm1 <-
multinom(impediment ~ female + education + income + religiosity + politics + location + white, data = training)
mlm2 <-
multinom(impediment ~ female * education * income * religiosity * politics * location * white, data = training)Baseline model with 70/30 train/test split and no interactions
[impediment ~ female + education + income + religiosity + politics + location + white]
for unvaccinated respondents classifies everyone as having no
impediment. Since we have very few respondents with ability impediments,
the model is not going to be useful despite the misclassification error
being low (19%).
predicted_class <- predict(mlm1, test)
table(`Predicted class` = predicted_class, `True class` = test$impediment)## True class
## Predicted class No impediment No Money No Time No Availability
## No impediment 107 3 19 3
## No Money 0 0 0 0
## No Time 0 0 0 0
## No Availability 0 0 0 0
mean(as.character(predicted_class) != as.character(test$impediment), na.rm = T)## [1] 0.1893939
Random Forest
We also try a random forest model with 70/30 train/test splitfor this
task. The misclassification error is around 20%, though most predictions
are No impediment.
# impute mising values before feeding into RF
training <-
training %>%
simputation::impute_lm(politics + income + religiosity + location + education ~ female + white) %>%
drop_na()
rf_model <-
randomForest::randomForest(impediment ~ female + education + income + religiosity + politics + location + white, data = training, importance = TRUE)
predicted_class <- predict(rf_model, test)
table(`Predicted class` = predicted_class, `True class` = test$impediment)## True class
## Predicted class No impediment No Money No Time No Availability
## No impediment 106 3 19 3
## No Money 0 0 0 0
## No Time 1 0 0 0
## No Availability 0 0 0 0
mean(as.character(predicted_class) != as.character(test$impediment), na.rm = T)## [1] 0.1969697
The variable importance based on Mean decrease accuracy and Mean decrease Gini are again inconsistent.
randomForest::varImpPlot(rf_model, sort = T, main = "Random Forest - Variable Importance")Correlation plots
The correlation plots for ability impediments and demographics support these results.
df_mc_numeric <-
df %>%
mutate(
female = case_when(
gender == "female" ~ 1,
gender == "male" ~ 0,
),
income = case_when(
income == "Unemployed" ~ 0,
income == "< R5,000" ~ 1,
income == "R5,000 – R9,999" ~ 2,
income == "R10,000 – R29,999" ~ 3,
income == "R30,000 – R49,999" ~ 4,
income == "R50,000 – R99,999" ~ 5,
income == "> R100,000" ~ 6,
),
education = case_when(
education == "< high school" ~ 1,
education == "high school" ~ 2,
education == "some college" ~ 3,
education == "2-year degree" ~ 4,
education == "4-year degree" ~ 5,
education == "graduate degree" ~ 6,
),
religiosity = case_when(
religiosity == "not very religious" ~ 1,
religiosity == "somewhat religious" ~ 2,
religiosity == "very religious" ~ 3,
),
politics = case_when(
politics == "conservative" ~ 1,
politics == "moderate" ~ 2,
politics == "liberal" ~ 3,
),
location = case_when(
location == "rural" ~ 1,
location == "suburban" ~ 2,
location == "urban" ~ 3,
),
white = case_when(
ethnicity == "white or caucasian" ~ 1,
ethnicity != "white or caucasian" ~ 0
),
no_ability = case_when(
ability == "no" ~ 1,
ability == "yes" ~ 0,
),
no_motive = case_when(
motive == "no" ~ 1,
motive == "yes" ~ 0,
),
no_time = if_else(ability_main == "time", 1L, 0L),
no_money = if_else(ability_main == "money", 1L, 0L),
no_availability = if_else(ability_main == "availability", 1L, 0L),
treat_nchar = nchar(best_treatment_explain),
unvax = if_else(vax_status == "unvax", 1L, 0L)
) %>%
select("no_ability", "female", "income", "education", "religiosity", "politics", "location", "white")
mat5 <- cor(df_mc_numeric, use = "pairwise.complete.obs")[1, 2:8]
mat5_p <- cor_pmat(df_mc_numeric)[1, 2:8]df_mc_numeric <-
df %>%
mutate(
female = case_when(
gender == "female" ~ 1,
gender == "male" ~ 0,
),
income = case_when(
income == "Unemployed" ~ 0,
income == "< R5,000" ~ 1,
income == "R5,000 – R9,999" ~ 2,
income == "R10,000 – R29,999" ~ 3,
income == "R30,000 – R49,999" ~ 4,
income == "R50,000 – R99,999" ~ 5,
income == "> R100,000" ~ 6,
),
education = case_when(
education == "< high school" ~ 1,
education == "high school" ~ 2,
education == "some college" ~ 3,
education == "2-year degree" ~ 4,
education == "4-year degree" ~ 5,
education == "graduate degree" ~ 6,
),
religiosity = case_when(
religiosity == "not very religious" ~ 1,
religiosity == "somewhat religious" ~ 2,
religiosity == "very religious" ~ 3,
),
politics = case_when(
politics == "conservative" ~ 1,
politics == "moderate" ~ 2,
politics == "liberal" ~ 3,
),
location = case_when(
location == "rural" ~ 1,
location == "suburban" ~ 2,
location == "urban" ~ 3,
),
white = case_when(
ethnicity == "white or caucasian" ~ 1,
ethnicity != "white or caucasian" ~ 0
),
no_ability = case_when(
ability == "no" ~ 1,
ability == "yes" ~ 0,
),
no_motive = case_when(
motive == "no" ~ 1,
motive == "yes" ~ 0,
),
no_time = if_else(ability_main == "time", 1L, 0L),
no_money = if_else(ability_main == "money", 1L, 0L),
no_availability = if_else(ability_main == "availability", 1L, 0L),
treat_nchar = nchar(best_treatment_explain),
unvax = if_else(vax_status == "unvax", 1L, 0L)
) %>%
select("no_time", "female", "income", "education", "religiosity", "politics", "location", "white")
mat6 <- cor(df_mc_numeric, use = "pairwise.complete.obs")[1, 2:8]
mat6_p <- cor_pmat(df_mc_numeric)[1, 2:8]df_mc_numeric <-
df %>%
mutate(
female = case_when(
gender == "female" ~ 1,
gender == "male" ~ 0,
),
income = case_when(
income == "Unemployed" ~ 0,
income == "< R5,000" ~ 1,
income == "R5,000 – R9,999" ~ 2,
income == "R10,000 – R29,999" ~ 3,
income == "R30,000 – R49,999" ~ 4,
income == "R50,000 – R99,999" ~ 5,
income == "> R100,000" ~ 6,
),
education = case_when(
education == "< high school" ~ 1,
education == "high school" ~ 2,
education == "some college" ~ 3,
education == "2-year degree" ~ 4,
education == "4-year degree" ~ 5,
education == "graduate degree" ~ 6,
),
religiosity = case_when(
religiosity == "not very religious" ~ 1,
religiosity == "somewhat religious" ~ 2,
religiosity == "very religious" ~ 3,
),
politics = case_when(
politics == "conservative" ~ 1,
politics == "moderate" ~ 2,
politics == "liberal" ~ 3,
),
location = case_when(
location == "rural" ~ 1,
location == "suburban" ~ 2,
location == "urban" ~ 3,
),
white = case_when(
ethnicity == "white or caucasian" ~ 1,
ethnicity != "white or caucasian" ~ 0
),
no_ability = case_when(
ability == "no" ~ 1,
ability == "yes" ~ 0,
),
no_motive = case_when(
motive == "no" ~ 1,
motive == "yes" ~ 0,
),
no_time = if_else(ability_main == "time", 1L, 0L),
no_money = if_else(ability_main == "money", 1L, 0L),
no_availability = if_else(ability_main == "availability", 1L, 0L),
treat_nchar = nchar(best_treatment_explain),
unvax = if_else(vax_status == "unvax", 1L, 0L)
) %>%
select("no_money", "female", "income", "education", "religiosity", "politics", "location", "white")
mat7 <- cor(df_mc_numeric, use = "pairwise.complete.obs")[1, 2:8]
mat7_p <- cor_pmat(df_mc_numeric)[1, 2:8]df_mc_numeric <-
df %>%
mutate(
female = case_when(
gender == "female" ~ 1,
gender == "male" ~ 0,
),
income = case_when(
income == "Unemployed" ~ 0,
income == "< R5,000" ~ 1,
income == "R5,000 – R9,999" ~ 2,
income == "R10,000 – R29,999" ~ 3,
income == "R30,000 – R49,999" ~ 4,
income == "R50,000 – R99,999" ~ 5,
income == "> R100,000" ~ 6,
),
education = case_when(
education == "< high school" ~ 1,
education == "high school" ~ 2,
education == "some college" ~ 3,
education == "2-year degree" ~ 4,
education == "4-year degree" ~ 5,
education == "graduate degree" ~ 6,
),
religiosity = case_when(
religiosity == "not very religious" ~ 1,
religiosity == "somewhat religious" ~ 2,
religiosity == "very religious" ~ 3,
),
politics = case_when(
politics == "conservative" ~ 1,
politics == "moderate" ~ 2,
politics == "liberal" ~ 3,
),
location = case_when(
location == "rural" ~ 1,
location == "suburban" ~ 2,
location == "urban" ~ 3,
),
white = case_when(
ethnicity == "white or caucasian" ~ 1,
ethnicity != "white or caucasian" ~ 0
),
no_ability = case_when(
ability == "no" ~ 1,
ability == "yes" ~ 0,
),
no_motive = case_when(
motive == "no" ~ 1,
motive == "yes" ~ 0,
),
no_time = if_else(ability_main == "time", 1L, 0L),
no_money = if_else(ability_main == "money", 1L, 0L),
no_availability = if_else(ability_main == "availability", 1L, 0L),
treat_nchar = nchar(best_treatment_explain),
unvax = if_else(vax_status == "unvax", 1L, 0L)
) %>%
select("no_availability", "female", "income", "education", "religiosity", "politics", "location", "white")
mat8 <- cor(df_mc_numeric, use = "pairwise.complete.obs")[1, 2:8]
mat8_p <- cor_pmat(df_mc_numeric)[1, 2:8]final_mat <-
tibble(
mat8, mat7, mat6, mat5
) %>%
as.matrix()
final_mat_p <-
tibble(
mat8_p, mat7_p, mat6_p, mat5_p
) %>%
as.matrix()
rownames(final_mat) <- c("female", "income", "education", "religiosity", "politics", "location", "white")
colnames(final_mat) <- c("no_availability", "no_money", "no_time", "no_ability")
colnames(final_mat_p) <- c("no_availability", "no_money", "no_time", "no_ability")
ggcorrplot(final_mat, lab_size = 3, lab = T, tl.cex = 10)1. In our February pilot version 5, no clear treatment was preferred by unvaccinated individuals, while vaccinated individuals strongly preferred family support. This trend changed with rising vaccinations in our March pilot version 6. A large proportion of unvaccinated and vaccinated respondents indicated job/school mandates as the treatment that worked for them.
Note: Job/school required treatment was added
after cleaning free text responses from pilot 5.
plot1 <-
df_v5 %>%
mutate(
best_treatment = str_to_sentence(best_treatment),
best_treatment = if_else(str_detect(best_treatment, "^Nothing"), "Nothing", best_treatment),
best_treatment = if_else(best_treatment %in% c("Family supports it", "Trusted info source", "More transparency", "Nothing", "Rewards for vaxxing", "Job/school required"), best_treatment, NA_character_),
) %>%
drop_na(best_treatment) %>%
count(vax_status, best_treatment) %>%
arrange(vax_status, -n) %>%
group_by(vax_status) %>%
mutate(perc = n/sum(n)) %>%
ungroup() %>%
mutate(best_treatment = fct_inorder(best_treatment) %>% fct_rev()) %>%
ggplot(aes(best_treatment, n)) +
geom_col() +
coord_flip() +
theme_bw() +
facet_wrap(vars(vax_status)) +
labs(x = "Best treatment", y = "# of respondents", title = "Treatments preferred to get vaxxed (Pilot 5, n = 1429)")
plot2 <-
df_v6 %>%
mutate(
best_treatment = str_to_sentence(best_treatment),
best_treatment = if_else(str_detect(best_treatment, "^Nothing"), "Nothing", best_treatment),
best_treatment = if_else(best_treatment %in% c("Family supports it", "Trusted info source", "More transparency", "Nothing", "Rewards for vaxxing", "Job/school required"), best_treatment, NA_character_),
) %>%
drop_na(best_treatment) %>%
count(vax_status, best_treatment) %>%
arrange(vax_status, -n) %>%
group_by(vax_status) %>%
mutate(perc = n/sum(n)) %>%
ungroup() %>%
mutate(best_treatment = fct_inorder(best_treatment) %>% fct_rev()) %>%
ggplot(aes(best_treatment, n)) +
geom_col() +
coord_flip() +
theme_bw() +
facet_wrap(vars(vax_status)) +
labs(x = "Best treatment", y = "# of respondents", title = "Treatments preferred to get vaxxed (Pilot 6, n = 1763)")
cowplot::plot_grid(plot1, plot2, nrow = 2)df_v5 %>%
transmute(
best_treatment = str_to_sentence(best_treatment),
best_treatment = if_else(str_detect(best_treatment, "^Nothing"), "Nothing", best_treatment),
best_treatment = if_else(best_treatment %in% c("Family supports it", "Trusted info source", "More transparency", "Nothing", "Rewards for vaxxing", "Job/school required"), best_treatment, NA_character_),
) %>%
drop_na(best_treatment) %>%
group_by(vax_status, best_treatment) %>%
summarise(
vax_mean = mean(vaxxed, na.rm = T),
vax_sd = sd(vaxxed, na.rm = T),
vax_n = sum(!is.na(vaxxed)),
vax_se = vax_sd / sqrt(vax_n)
) %>%
mutate(
confint_low = vax_mean - 1.96*vax_se,
confint_high = vax_mean + 1.96*vax_se,
)2. Free text responses for preferred treatments were dominated by responses highlighting that nothing would work. The second most frequent response was job/school mandates, which has now been incorporated as an encoded response in our chatbot.
Question: What would best help you to get vaccinated?
df_text %>%
count(best_treatment_other_clean) %>%
arrange(n) %>%
drop_na(best_treatment_other_clean) %>%
filter(best_treatment_other_clean != "redundant") %>%
mutate(
best_treatment_other_clean = fct_inorder(best_treatment_other_clean),
encoded = (best_treatment_other_clean %in% c("nothing", "trusted info source", "more transparency"))
) %>%
ggplot(aes(best_treatment_other_clean, n, fill = encoded)) +
geom_col() +
coord_flip() +
theme_ipsum() +
theme(legend.position = "bottom") +
labs(
y = "Respondent count",
x = "Treatment text",
fill = "Encoded in chatbot"
)Proportion of redundant responses: 7.3%
df_text %>%
mutate(is_redundant = (best_treatment_other_clean == "redundant")) %>%
drop_na(best_treatment_other_clean) %>%
tabyl(is_redundant) %>%
as_tibble() %>%
select(is_redundant, percent) %>%
kable(format = "pipe")| is_redundant | percent |
|---|---|
| FALSE | 0.9266667 |
| TRUE | 0.0733333 |
3. Models with demographics are not strong predictors of preferred treatments.
Multinomial logit
# multinomial logit
df_mlm <-
df %>%
transmute(
female = case_when(
gender == "female" ~ 1,
gender == "male" ~ 0,
),
income = case_when(
income == "Unemployed" ~ 0,
income == "< R5,000" ~ 1,
income == "R5,000 – R9,999" ~ 2,
income == "R10,000 – R29,999" ~ 3,
income == "R30,000 – R49,999" ~ 4,
income == "R50,000 – R99,999" ~ 5,
income == "> R100,000" ~ 6,
),
education = case_when(
education == "< high school" ~ 1,
education == "high school" ~ 2,
education == "some college" ~ 3,
education == "2-year degree" ~ 4,
education == "4-year degree" ~ 5,
education == "graduate degree" ~ 6,
),
religiosity = case_when(
religiosity == "not very religious" ~ 1,
religiosity == "somewhat religious" ~ 2,
religiosity == "very religious" ~ 3,
),
politics = case_when(
politics == "conservative" ~ 1,
politics == "moderate" ~ 2,
politics == "liberal" ~ 3,
),
location = case_when(
location == "rural" ~ 1,
location == "suburban" ~ 2,
location == "urban" ~ 3,
),
white = case_when(
ethnicity == "white or caucasian" ~ 1,
ethnicity != "white or caucasian" ~ 0
),
best_treatment = str_to_lower(best_treatment),
best_treatment = if_else(str_detect(best_treatment, "^nothing"), "nothing", best_treatment),
best_treatment = if_else(best_treatment %in% c("job/school required", "family supports it", "trusted info source", "more transparency", "nothing", "rewards for vaxxing"), best_treatment, NA_character_) %>% as_factor(),
)
df_mlm$best_treatment <- relevel(df_mlm$best_treatment, ref = "nothing")
trainingRows <- sample(1:nrow(df_mlm), 0.7 * nrow(df_mlm))
training <- df_mlm[trainingRows, ]
test <- df_mlm[-trainingRows, ]
mlm1 <-
multinom(best_treatment ~ female + education + income + religiosity + politics + location + white, data = training)
mlm2 <-
multinom(best_treatment ~ female * education * income * religiosity * politics * location * white, data = training)Baseline model with 70/30 train/test split and no interactions
[best_treatment ~ female + education + income + religiosity + politics + location + white]
for all respondents doesn’t do well as we have a misclassification error
of 70%.
predicted_class <- predict(mlm1, test)
table(`Predicted class` = predicted_class, `True class` = test$best_treatment)## True class
## Predicted class nothing family supports it job/school required
## nothing 1 0 0
## family supports it 26 60 29
## job/school required 12 55 39
## rewards for vaxxing 1 3 1
## trusted info source 3 1 4
## more transparency 1 0 1
## True class
## Predicted class rewards for vaxxing trusted info source more transparency
## nothing 0 1 0
## family supports it 7 31 17
## job/school required 8 20 7
## rewards for vaxxing 0 3 1
## trusted info source 1 1 2
## more transparency 1 0 0
mean(as.character(predicted_class) != as.character(test$best_treatment), na.rm = T)## [1] 0.7002967
Baseline model with 70/30 train/test split and all possible independent variable interactions doesn’t do well as we have a misclassification error of 73%.
predicted_class <- predict(mlm2, test)
table(`Predicted class` = predicted_class, `True class` = test$best_treatment)## True class
## Predicted class nothing family supports it job/school required
## nothing 3 8 5
## family supports it 18 48 30
## job/school required 13 45 21
## rewards for vaxxing 3 1 3
## trusted info source 6 14 13
## more transparency 1 3 2
## True class
## Predicted class rewards for vaxxing trusted info source more transparency
## nothing 1 1 1
## family supports it 7 29 16
## job/school required 7 17 5
## rewards for vaxxing 0 3 1
## trusted info source 1 5 4
## more transparency 1 1 0
mean(as.character(predicted_class) != as.character(test$best_treatment), na.rm = T)## [1] 0.7715134
Random Forest
We also try a random forest model with 70/30 train/test splitfor this task. The misclassification error is around 71%.
# impute mising values before feeding into RF
training <-
training %>%
simputation::impute_lm(politics + income + religiosity + location + education ~ female + white) %>%
drop_na()
rf_model <-
randomForest::randomForest(best_treatment ~ female + education + income + religiosity + politics + location + white, data = training, importance = TRUE)
predicted_class <- predict(rf_model, test)
table(`Predicted class` = predicted_class, `True class` = test$best_treatment)## True class
## Predicted class nothing family supports it job/school required
## nothing 5 7 1
## family supports it 18 50 26
## job/school required 19 56 40
## rewards for vaxxing 0 1 1
## trusted info source 2 4 5
## more transparency 0 1 1
## True class
## Predicted class rewards for vaxxing trusted info source more transparency
## nothing 0 4 0
## family supports it 6 28 17
## job/school required 7 23 8
## rewards for vaxxing 0 0 0
## trusted info source 3 1 2
## more transparency 1 0 0
mean(as.character(predicted_class) != as.character(test$best_treatment), na.rm = T)## [1] 0.7151335
The variable importance based on Mean decrease accuracy and Mean decrease Gini are again inconsistent.
randomForest::varImpPlot(rf_model, sort = T, main = "Random Forest - Variable Importance")Correlation Plots
Correlation plots for treatments vs demographics support these results:
df_mc_numeric <-
df %>%
mutate(
female = case_when(
gender == "female" ~ 1,
gender == "male" ~ 0,
),
income = case_when(
income == "Unemployed" ~ 0,
income == "< R5,000" ~ 1,
income == "R5,000 – R9,999" ~ 2,
income == "R10,000 – R29,999" ~ 3,
income == "R30,000 – R49,999" ~ 4,
income == "R50,000 – R99,999" ~ 5,
income == "> R100,000" ~ 6,
),
education = case_when(
education == "< high school" ~ 1,
education == "high school" ~ 2,
education == "some college" ~ 3,
education == "2-year degree" ~ 4,
education == "4-year degree" ~ 5,
education == "graduate degree" ~ 6,
),
religiosity = case_when(
religiosity == "not very religious" ~ 1,
religiosity == "somewhat religious" ~ 2,
religiosity == "very religious" ~ 3,
),
politics = case_when(
politics == "conservative" ~ 1,
politics == "moderate" ~ 2,
politics == "liberal" ~ 3,
),
location = case_when(
location == "rural" ~ 1,
location == "suburban" ~ 2,
location == "urban" ~ 3,
),
white = case_when(
ethnicity == "white or caucasian" ~ 1,
ethnicity != "white or caucasian" ~ 0
),
treat_nchar = nchar(best_treatment_explain),
unvax = if_else(vax_status == "unvax", 1L, 0L),
best_treatment = str_to_lower(best_treatment),
best_treatment = if_else(str_detect(best_treatment, "^nothing"), "nothing", best_treatment),
best_treatment = if_else(best_treatment %in% c("job/school required", "family supports it", "trusted info source", "more transparency", "nothing", "rewards for vaxxing"), best_treatment, NA_character_),
) %>%
fastDummies::dummy_cols(select_columns = "best_treatment") %>%
select(family_supports_it = `best_treatment_family supports it`, "female", "income", "education", "religiosity", "politics", "location", "white")
mat9 <- cor(df_mc_numeric, use = "pairwise.complete.obs")[1, 2:8]
mat9_p <- cor_pmat(df_mc_numeric)[1, 2:8]df_mc_numeric <-
df %>%
mutate(
female = case_when(
gender == "female" ~ 1,
gender == "male" ~ 0,
),
income = case_when(
income == "Unemployed" ~ 0,
income == "< R5,000" ~ 1,
income == "R5,000 – R9,999" ~ 2,
income == "R10,000 – R29,999" ~ 3,
income == "R30,000 – R49,999" ~ 4,
income == "R50,000 – R99,999" ~ 5,
income == "> R100,000" ~ 6,
),
education = case_when(
education == "< high school" ~ 1,
education == "high school" ~ 2,
education == "some college" ~ 3,
education == "2-year degree" ~ 4,
education == "4-year degree" ~ 5,
education == "graduate degree" ~ 6,
),
religiosity = case_when(
religiosity == "not very religious" ~ 1,
religiosity == "somewhat religious" ~ 2,
religiosity == "very religious" ~ 3,
),
politics = case_when(
politics == "conservative" ~ 1,
politics == "moderate" ~ 2,
politics == "liberal" ~ 3,
),
location = case_when(
location == "rural" ~ 1,
location == "suburban" ~ 2,
location == "urban" ~ 3,
),
white = case_when(
ethnicity == "white or caucasian" ~ 1,
ethnicity != "white or caucasian" ~ 0
),
treat_nchar = nchar(best_treatment_explain),
unvax = if_else(vax_status == "unvax", 1L, 0L),
best_treatment = str_to_lower(best_treatment),
best_treatment = if_else(str_detect(best_treatment, "^nothing"), "nothing", best_treatment),
best_treatment = if_else(best_treatment %in% c("job/school required", "family supports it", "trusted info source", "more transparency", "nothing", "rewards for vaxxing"), best_treatment, NA_character_)
) %>%
fastDummies::dummy_cols(select_columns = "best_treatment") %>%
select(trusted_info_source = `best_treatment_trusted info source`, "female", "income", "education", "religiosity", "politics", "location", "white")
mat10 <- cor(df_mc_numeric, use = "pairwise.complete.obs")[1, 2:8]
mat10_p <- cor_pmat(df_mc_numeric)[1, 2:8]df_mc_numeric <-
df %>%
mutate(
female = case_when(
gender == "female" ~ 1,
gender == "male" ~ 0,
),
income = case_when(
income == "Unemployed" ~ 0,
income == "< R5,000" ~ 1,
income == "R5,000 – R9,999" ~ 2,
income == "R10,000 – R29,999" ~ 3,
income == "R30,000 – R49,999" ~ 4,
income == "R50,000 – R99,999" ~ 5,
income == "> R100,000" ~ 6,
),
education = case_when(
education == "< high school" ~ 1,
education == "high school" ~ 2,
education == "some college" ~ 3,
education == "2-year degree" ~ 4,
education == "4-year degree" ~ 5,
education == "graduate degree" ~ 6,
),
religiosity = case_when(
religiosity == "not very religious" ~ 1,
religiosity == "somewhat religious" ~ 2,
religiosity == "very religious" ~ 3,
),
politics = case_when(
politics == "conservative" ~ 1,
politics == "moderate" ~ 2,
politics == "liberal" ~ 3,
),
location = case_when(
location == "rural" ~ 1,
location == "suburban" ~ 2,
location == "urban" ~ 3,
),
white = case_when(
ethnicity == "white or caucasian" ~ 1,
ethnicity != "white or caucasian" ~ 0
),
treat_nchar = nchar(best_treatment_explain),
unvax = if_else(vax_status == "unvax", 1L, 0L),
best_treatment = str_to_lower(best_treatment),
best_treatment = if_else(str_detect(best_treatment, "^nothing"), "nothing", best_treatment),
best_treatment = if_else(best_treatment %in% c("job/school required", "family supports it", "trusted info source", "more transparency", "nothing", "rewards for vaxxing"), best_treatment, NA_character_)
) %>%
fastDummies::dummy_cols(select_columns = "best_treatment") %>%
select(job_school_required = `best_treatment_job/school required`, "female", "income", "education", "religiosity", "politics", "location", "white")
mat11 <- cor(df_mc_numeric, use = "pairwise.complete.obs")[1, 2:8]
mat11_p <- cor_pmat(df_mc_numeric)[1, 2:8]df_mc_numeric <-
df %>%
mutate(
female = case_when(
gender == "female" ~ 1,
gender == "male" ~ 0,
),
income = case_when(
income == "Unemployed" ~ 0,
income == "< R5,000" ~ 1,
income == "R5,000 – R9,999" ~ 2,
income == "R10,000 – R29,999" ~ 3,
income == "R30,000 – R49,999" ~ 4,
income == "R50,000 – R99,999" ~ 5,
income == "> R100,000" ~ 6,
),
education = case_when(
education == "< high school" ~ 1,
education == "high school" ~ 2,
education == "some college" ~ 3,
education == "2-year degree" ~ 4,
education == "4-year degree" ~ 5,
education == "graduate degree" ~ 6,
),
religiosity = case_when(
religiosity == "not very religious" ~ 1,
religiosity == "somewhat religious" ~ 2,
religiosity == "very religious" ~ 3,
),
politics = case_when(
politics == "conservative" ~ 1,
politics == "moderate" ~ 2,
politics == "liberal" ~ 3,
),
location = case_when(
location == "rural" ~ 1,
location == "suburban" ~ 2,
location == "urban" ~ 3,
),
white = case_when(
ethnicity == "white or caucasian" ~ 1,
ethnicity != "white or caucasian" ~ 0
),
treat_nchar = nchar(best_treatment_explain),
unvax = if_else(vax_status == "unvax", 1L, 0L),
best_treatment = str_to_lower(best_treatment),
best_treatment = if_else(str_detect(best_treatment, "^nothing"), "nothing", best_treatment),
best_treatment = if_else(best_treatment %in% c("job/school required", "family supports it", "trusted info source", "more transparency", "nothing", "rewards for vaxxing"), best_treatment, NA_character_)
) %>%
fastDummies::dummy_cols(select_columns = "best_treatment") %>%
select(rewards_for_vaxxing = `best_treatment_rewards for vaxxing`, "female", "income", "education", "religiosity", "politics", "location", "white")
mat12 <- cor(df_mc_numeric, use = "pairwise.complete.obs")[1, 2:8]
mat12_p <- cor_pmat(df_mc_numeric)[1, 2:8]final_mat <-
tibble(
mat12, mat11, mat10, mat9
) %>%
as.matrix()
final_mat_p <-
tibble(
mat12_p, mat11_p, mat10_p, mat9_p
) %>%
as.matrix()
rownames(final_mat) <- c("female", "income", "education", "religiosity", "politics", "location", "white")
colnames(final_mat) <- c("rewards", "mandatory", "trusted_info_source", "family_support")
colnames(final_mat_p) <- c("rewards", "mandatory", "trusted_info_source", "family_support")
ggcorrplot(final_mat, lab_size = 3, lab = T, tl.cex = 10)1. 85% respondents complete the survey conditional on consent. Cost per completed survey is around $0.5 (excluding airtime). Cost per completed survey for unvaccinated participants is relatively higher at $1.27 (excluding airtime) as we have less unvaccinated respondents in the most recent pilot.
Metrics used in the table below are defined as follows:
Impressions (Total Count) = the total number of times
our ad has been viewedClickthrough (%) = #clicks / #impressionsMessages Sent (%) = #conversations / #clicksConsent Obtained (%) = #consents / #conversationsCore Survey Complete (%) = #forking section completed /
#consentsTreatment Complete (%) = #treatment section completed /
#forking section completedDemo Questions Complete (%) = #demog section completed
/ #treatment section completedFull Survey Complete (%) = #full chat completed /
#demog section completedCost per Impression = amount spent / #impressions (in
USD)Cost per Link Click = amount spent / #clicks (in
USD)Cost per Survey Complete (All participants) = amount
spent / #full chat completed (in USD)Cost per Survey Complete (Unvax) = amount spent / #full
chat completed with unvaccinated participants (in USD)Cost per Survey Complete (Unvax, Open to Treatment) =
amount spent / #full chat completed with unvaccinated and open to
treatment participants (in USD)Note: Costs in below table exclude $0.5 airtime cost per respondent completing the survey.
# overall ad cost for full pilot
df_temp <- df_full
ads_latest <- ads
df_ads_unvax_current <- df_temp %>% filter(vax_status == "unvax")
df_ads_unvax_open_current <- df_ads_unvax_current %>% filter(vax_future %in% c("maybe", "of course!"))
cost <- sum(ads_latest$Amount.spent..USD., na.rm = T)
impressions <- sum(ads_latest$Impressions, na.rm = T)
clicks <- sum(ads_latest$Link.clicks, na.rm = T)
conversations <- sum(ads_latest$Results, na.rm = T)
consents <- sum(df_temp$consent == "yes", na.rm = T)
core_complete <- sum(!is.na(df_temp$main_complete), na.rm = T)
treatment_complete <- sum(df_temp$treatment_complete == "yes", na.rm = T)
demog_complete <- sum(!is.na(df_temp$demog_complete), na.rm = T)
full_complete <- sum(df_temp$full_complete == "yes", na.rm = T)
full_complete_unvax <- sum(df_ads_unvax_current$full_complete == "yes", na.rm = T)
full_complete_unvax_open <- sum(df_ads_unvax_open_current$full_complete == "yes", na.rm = T)
dropoff_current <- data.frame(
metric = c("Impressions", "Link Clicks", "Messages Sent",
"Consent Obtained", "Core Survey Complete", "Treatment complete",
"Demographic Questions Complete", "Full Survey Complete",
"Full Survey Complete (Unvax)", "Full Survey Complete (Unvax, Open to Treatment)"),
total = c(impressions, clicks, conversations, consents,
core_complete, treatment_complete, demog_complete, full_complete,
full_complete_unvax, full_complete_unvax_open),
perc_of_prev = c("-", form_percent(clicks / impressions),
form_percent(conversations / clicks),
form_percent(consents / conversations),
form_percent(core_complete / consents),
form_percent(treatment_complete/core_complete),
form_percent(demog_complete / treatment_complete),
form_percent(full_complete / demog_complete),
form_percent(full_complete_unvax / full_complete),
form_percent(full_complete_unvax_open / full_complete_unvax)),
cost_per = c(form_cost(impressions), form_cost(clicks),
form_cost(conversations), form_cost(consents),
form_cost(core_complete), form_cost(treatment_complete),
form_cost(demog_complete), form_cost(full_complete),
form_cost(full_complete_unvax), form_cost(full_complete_unvax_open)))
dropoff_current %>%
rename(
Metric = metric,
`Total` = total,
`% of Previous Funnel` = perc_of_prev,
`$ Ad Cost per` = cost_per
) %>%
datatable(options = list(pageLength = 10, columnDefs = list(list(orderable = TRUE, targets = 0))))2. Dropoff starts early among participants not completing our survey.
For unvaccinated respondents (including those not reporting vax status):
Question-wise completion rates for unvaccinated participants not fully completing the survey:
df_full %>%
select(continue_2,continue_3,vax_status,motive,ability,unvaxeasy_main,unvaxeasy_main_other,forget_main,forget_main_other,priority_main,priority_main_other,unvaxeasy_other,unvaxeasy_other_add,vax_future,best_treatment,best_treatment_explain,best_treatment_explain,best_treatment_explain,best_treatment_explain,post_vax_measure,cv_age,ethnicity,income,education,religion,christian_type,religiosity,politics,location,comfortable,engaging,enjoyable,enjoyable_not_parts,suggestions,phone_number,full_complete) %>%
filter(is.na(full_complete)) %>%
filter(is.na(vax_status) | vax_status == "unvax") %>%
summarize_all(~ mean(!is.na(.))) %>%
mutate(id = 1) %>%
pivot_longer(cols = -id, names_to = "question", values_to = "completion_rate") %>%
arrange(-completion_rate) %>%
transmute(
completion_rate,
question = case_when(
question == "continue_2" ~ "Consent 1: Ask your opinion on vaccines",
question == "continue_3" ~ "Consent 2: Continue if you're over 18 and wish to start",
question == "vax_status" ~ "Vaccination status",
question == "motive" ~ "Motive to get vaccine",
question == "ability" ~ "Ability to get vaacine",
question == "vax_future" ~ "Get vaccine in future",
question == "best_treatment" ~ "Preferred treatment",
question == "ethnicity" ~ "Ethnicity",
question == "income" ~ "Income",
question == "education" ~ "Education",
question == "religion" ~ "Religion",
question == "religiosity" ~ "Religiosity",
question == "politics" ~ "Politics",
question == "location" ~ "Location",
question == "comfortable" ~ "Was chat comfortable?",
question == "enjoyable" ~ "Was chat enjoyable?",
TRUE ~ NA_character_
) %>% fct_inorder() %>% fct_rev()
) %>%
drop_na() %>%
ggplot(aes(question, completion_rate)) +
geom_col(alpha = 0.85) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
coord_flip() +
theme_minimal() +
labs(
x = "Question",
y = "Completion rate (%)",
# subtitle = "Completion rate by question for non-completers"
)3. Most participants reaching the end of the survey enjoyed interacting with/were comfortable with the chatbot.
df_v6 %>%
select(version, enjoyable) %>%
bind_rows(
df_full_v4b %>% filter(full_complete == "yes", version == "pilot_4b") %>% select(version, enjoyable),
df_full_v5 %>% filter(full_complete == "yes", version == "pilot_5") %>% select(version, enjoyable)
) %>%
mutate(
enjoyable = str_to_sentence(enjoyable) %>% str_squish()
) %>%
filter(enjoyable %in% c("Very enjoyable", "Moderately enjoyable", "Not enjoyable")) %>%
count(version, enjoyable) %>%
group_by(version) %>%
mutate(percent = n/sum(n)) %>%
ungroup() %>%
arrange(version, n) %>%
mutate(
enjoyable = enjoyable %>% fct_inorder() %>% fct_rev()
) %>%
#rename(ad_text = `Analysis 4 - body text`)
ggplot(aes(version, percent, fill = enjoyable), color = "black") +
geom_col(position = "stack", alpha = 0.8) +
scale_y_continuous(labels = scales::percent) +
scale_fill_brewer(palette = "Set2") +
theme_bw() +
theme(legend.position = "bottom") +
labs(
x = "Version",
y = "% respondents",
color = ""
)df %>%
select(version, enjoyable) %>%
mutate(
enjoyable = str_to_sentence(enjoyable) %>% str_squish()
) %>%
filter(enjoyable %in% c("Very enjoyable", "Moderately enjoyable", "Not enjoyable")) %>%
count(version, enjoyable) %>%
group_by(version) %>%
mutate(percent = n/sum(n)) %>%
ungroup() %>%
arrange(version, n) %>%
mutate(
enjoyable = enjoyable %>% fct_inorder() %>% fct_rev()
) %>%
ggplot(aes(version, percent, fill = enjoyable), color = "black") +
geom_col(position = "stack", alpha = 0.8) +
scale_y_continuous(labels = scales::percent) +
scale_fill_brewer(palette = "Set2") +
theme_bw() +
theme(legend.position = "bottom") +
labs(
x = "Version",
y = "% respondents",
color = ""
)df_v6 %>%
select(version, comfortable) %>%
bind_rows(
df_full_v4b %>% filter(full_complete == "yes", version == "pilot_4b") %>% select(version, comfortable),
df_full_v5 %>% filter(full_complete == "yes", version == "pilot_5") %>% select(version, comfortable)
) %>%
mutate(
comfortable = str_to_sentence(comfortable) %>% str_squish()
) %>%
filter(comfortable %in% c("Very comfortable", "Somewhat comfortable", "Not comfortable")) %>%
count(version, comfortable) %>%
group_by(version) %>%
mutate(percent = n/sum(n)) %>%
ungroup() %>%
arrange(version, n) %>%
mutate(
comfortable = comfortable %>% fct_inorder() %>% fct_rev()
) %>%
ggplot(aes(version, percent, fill = comfortable), color = "black") +
geom_col(position = "stack", alpha = 0.8) +
scale_y_continuous(labels = scales::percent) +
scale_fill_brewer(palette = "Set2") +
theme_bw() +
theme(legend.position = "bottom") +
labs(
x = "Version",
y = "% respondents",
color = ""
)For context on ads used in this pilot, we have:
Details on each ad can be found here.
1. Facebook A|B testing is not a “true” randomized A|B test as gender distribution is unbalanced across the 5 ad sets.
Even though the randomization is at the ad set level, we can see that
pilot_5_unncessary_control and
pilot_5_risky_airtime have very different gender ratios
(close to 1 and 2 respectively). Facebook essentially enables a
comparative test by dividing up the total population into five equal
samples here, then runs an algorithm to identify attractive target
populations to maximize cost efficiency on each sample.
df_v5 %>%
inner_join(ads_v5, by = "original_ref") %>%
clean_names() %>%
filter(gender != "other") %>%
compareGroups(gender ~ ad_set, data = .) %>%
createTable(show.all = T, show.p.overall = F)##
## --------Summary descriptives table by 'gender'---------
##
## _____________________________________________________________________
## [ALL] female male
## N=1379 N=839 N=540
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## ad_set:
## pilot_5_unnecessary_control 139 (10.1%) 75 (8.94%) 64 (11.9%)
## pilot_v5_inaccessible_airtime 252 (18.3%) 143 (17.0%) 109 (20.2%)
## pilot_v5_risky_airtime 429 (31.1%) 285 (34.0%) 144 (26.7%)
## pilot_v5_risky_control 115 (8.34%) 79 (9.42%) 36 (6.67%)
## pilot_v5_unnecessary_airtime 444 (32.2%) 257 (30.6%) 187 (34.6%)
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
2. Ads with airtime outperform our control ads in recruiting participants, even after adjusting for the quality of responses.
The table below compares two Ad body text approaches - control (share your opinion) vs (take a short survey and earn airtime) - in terms of the metrics defined in section 3.4.
ads_specific <- ads %>% filter(`Analysis 4 - body text` == "control")
df_ads <- df_full %>% filter(original_ref %in% (ads_specific %>% pull(original_ref)))
df_ads_unvax <- df_ads %>% filter(vax_status == "unvax")
df_ads_unvax_open <- df_ads_unvax %>% filter(vax_future %in% c("maybe", "of course!"))
cost <- sum(ads_specific$Amount.spent..USD., na.rm = T)
impressions <- sum(ads_specific$Impressions, na.rm = T)
clicks <- sum(ads_specific$Link.clicks, na.rm = T)
conversations <- sum(ads_specific$Results, na.rm = T)
consents <- sum(df_ads$consent == "yes", na.rm = T)
core_complete <- sum(!is.na(df_ads$main_complete), na.rm = T)
treat_complete <- sum(!is.na(df_ads$treatment_complete), na.rm = T)
demog_complete <- sum(!is.na(df_ads$demog_complete), na.rm = T)
full_complete <- sum(df_ads$full_complete == "yes", na.rm = T)
full_complete_unvax <- sum(df_ads_unvax$full_complete == "yes", na.rm = T)
full_complete_unvax_open <- sum(df_ads_unvax_open$full_complete == "yes", na.rm = T)
treat_nchar <-
df_ads %>%
transmute(
treat_nchar = nchar(best_treatment_explain)
) %>%
summarize_all(sum, na.rm = T) %>%
pull(treat_nchar)
free_text_vars <- df_ads %>% select(ends_with("_explain")) %>% colnames()
free_text_vars <- free_text_vars[!free_text_vars %in% c('best_treatment_explain')]
mean_nchar <-
df_ads %>%
transmute_at(
vars(free_text_vars),
~ nchar(.)
) %>%
summarize_all(sum, na.rm = T) %>%
mutate(
mean_nchar =
pmap_dbl(
select(., ends_with("_explain")),
~ mean(c(...), na.rm = TRUE)
)
) %>%
pull(mean_nchar) %>%
round(2)
dropoff_1 <- tibble(
metric = c(
"Impressions (Total Count)",
"Clickthrough (%)",
"Messages Sent (%)",
"Consent Obtained (%)",
"Core Survey Complete (%)",
"Treatment Complete (%)",
"Demo Questions Complete (%)",
"Full Survey Complete (%)",
"Total characters elicited per completed survey (treatment)",
"Avg characters elicited per completed survey (impediment explanations)",
"Cost per Impression",
"Cost per Link Click",
"Cost per Survey Complete (All participants)",
"Cost per Survey Complete (Unvax)",
"Cost per Survey Complete (Unvax, Open to Treatment)"
),
ad = c(
impressions,
form_percent(clicks / impressions),
form_percent(conversations / clicks),
form_percent(consents / conversations),
form_percent(core_complete / consents),
form_percent(treat_complete / core_complete),
form_percent(demog_complete / treat_complete),
form_percent(full_complete / demog_complete),
round(treat_nchar/full_complete, 2),
round(mean_nchar/full_complete, 2),
form_cost(impressions),
form_cost(clicks),
form_cost(full_complete),
form_cost(full_complete_unvax),
form_cost(full_complete_unvax_open)
)
)
colnames(dropoff_1) <- c("Metric of Interest", "Control (6)")ads_specific <- ads %>% filter(`Analysis 4 - body text` == "airtime")
df_ads <- df_full %>% filter(original_ref %in% (ads_specific %>% pull(original_ref)))
df_ads_unvax <- df_ads %>% filter(vax_status == "unvax")
df_ads_unvax_open <- df_ads_unvax %>% filter(vax_future %in% c("maybe", "of course!"))
cost <- sum(ads_specific$Amount.spent..USD., na.rm = T)
impressions <- sum(ads_specific$Impressions, na.rm = T)
clicks <- sum(ads_specific$Link.clicks, na.rm = T)
conversations <- sum(ads_specific$Results, na.rm = T)
consents <- sum(df_ads$consent == "yes", na.rm = T)
core_complete <- sum(!is.na(df_ads$main_complete), na.rm = T)
treat_complete <- sum(!is.na(df_ads$treatment_complete), na.rm = T)
demog_complete <- sum(!is.na(df_ads$demog_complete), na.rm = T)
full_complete <- sum(df_ads$full_complete == "yes", na.rm = T)
full_complete_unvax <- sum(df_ads_unvax$full_complete == "yes", na.rm = T)
full_complete_unvax_open <- sum(df_ads_unvax_open$full_complete == "yes", na.rm = T)
treat_nchar <-
df_ads %>%
transmute(
treat_nchar = nchar(best_treatment_explain)
) %>%
summarize_all(sum, na.rm = T) %>%
pull(treat_nchar)
free_text_vars <- df_ads %>% select(ends_with("_explain")) %>% colnames()
free_text_vars <- free_text_vars[!free_text_vars %in% c('best_treatment_explain')]
mean_nchar <-
df_ads %>%
transmute_at(
vars(free_text_vars),
~ nchar(.)
) %>%
summarize_all(sum, na.rm = T) %>%
mutate(
mean_nchar =
pmap_dbl(
select(., ends_with("_explain")),
~ mean(c(...), na.rm = TRUE)
)
) %>%
pull(mean_nchar) %>%
round(2)
dropoff_2 <- tibble(
metric = c(
"Impressions (Total Count)",
"Clickthrough (%)",
"Messages Sent (%)",
"Consent Obtained (%)",
"Core Survey Complete (%)",
"Treatment Complete (%)",
"Demo Questions Complete (%)",
"Full Survey Complete (%)",
"Total characters elicited per completed survey (treatment)",
"Avg characters elicited per completed survey (impediment explanations)",
"Cost per Impression",
"Cost per Link Click",
"Cost per Survey Complete (All participants)",
"Cost per Survey Complete (Unvax)",
"Cost per Survey Complete (Unvax, Open to Treatment)"
),
ad = c(
impressions,
form_percent(clicks / impressions),
form_percent(conversations / clicks),
form_percent(consents / conversations),
form_percent(core_complete / consents),
form_percent(treat_complete / core_complete),
form_percent(demog_complete / treat_complete),
form_percent(full_complete / demog_complete),
round(treat_nchar/full_complete, 2),
round(mean_nchar/full_complete, 2),
form_cost(impressions),
form_cost(clicks),
form_cost(full_complete),
form_cost(full_complete_unvax),
form_cost(full_complete_unvax_open)
)
)
colnames(dropoff_2) <- c("Metric of Interest", "Airtime (9)")## MERGE dfs ##
dropoff_1 %>%
full_join(dropoff_2, by = "Metric of Interest") %>%
datatable(options = list(pageLength = 20, columnDefs = list(list(orderable = TRUE, targets = 0))))