# full data
df_full_v8 <-
read_csv(here("pilot8/data/full_df_clean.csv")) %>%
clean_names() %>%
mutate_if(is.character, ~ str_replace_all(., '[\n\t]', '')) %>%
mutate(
motive = if_else(str_detect(motive, "yes"), "yes", "no"),
motive_main = if_else(str_detect(motive_main, "risk"), "risk", motive_main),
best_treatment = str_remove_all(best_treatment, "\\.") %>% str_to_sentence(),
best_treatment = if_else(best_treatment == "New trusted info", "More safety evidence", best_treatment),
) %>%
remove_empty("rows")
# filter to completes and add features
df <-
df_full_v8 %>%
filter(full_complete == "complete") %>%
drop_na(vax_status)
df_features <-
df %>%
bind_cols(
df %>%
pull(opinion_conversation) %>%
get_sentences() %>%
sentiment_by() %>%
transmute(opinion_conv_sentiment = ave_sentiment)
) %>%
mutate(
covid_already = if_else(str_detect(covid_already, "No|no"), 0L, 1L) %>% replace_na(0),
no_motive = case_when(
motive == "yes" ~ 0L,
motive == "no" ~ 1L,
),
motive_elaboration = motive_nchar,
# post_want_vax,
no_ability = case_when(
ability == "easy" ~ 0L,
ability != "easy" & !is.na(ability) ~ 1L,
) %>% replace_na(0),
ability_elaboration = ability_nchar %>% replace_na(0),
against_beliefs = if_else(motive_main == "beliefs", 1L, 0L) %>% replace_na(0),
no_benefits = if_else(motive_main == "benefit", 1L, 0L) %>% replace_na(0),
risky = if_else(motive_main == "risk", 1L, 0L) %>% replace_na(0),
no_time = if_else(ability_main == "time", 1L, 0L) %>% replace_na(0),
no_money = if_else(ability_main == "money", 1L, 0L) %>% replace_na(0),
no_availability = if_else(ability_main == "availability", 1L, 0L) %>% replace_na(0),
# risk
bad_side_effects = if_else(str_detect(risk_main, "bad side effects"), 1L, 0L) %>% replace_na(0),
lack_of_testing = if_else(str_detect(risk_main, "not enough testing"), 1L, 0L) %>% replace_na(0),
not_trust_pharma = if_else(str_detect(risk_main, "not trust phar") | str_detect(belief_main, "not trust phar"), 1L, 0L) %>% replace_na(0),
# benefit
covid_not_dangerous = if_else(str_detect(benefit_main, "covid not dangerous"), 1L, 0L) %>% replace_na(0),
# had_covid_before = if_else(str_detect(benefit_main, "had covid before"), 1L, 0L) %>% replace_na(0),
vaccines_dont_work = if_else(str_detect(benefit_main, "vaccines don't work"), 1L, 0L) %>% replace_na(0),
# belief
freedom_to_choose = if_else(str_detect(belief_main, "freedom to choose"), 1L, 0L) %>% replace_na(0),
religious_reasons = if_else(str_detect(belief_main, "religious reasons"), 1L, 0L) %>% replace_na(0),
# time
no_time_off_work = if_else(str_detect(time_main, "hard to get off work"), 1L, 0L) %>% replace_na(0),
no_time_to_research = if_else(str_detect(time_main, "no time to research"), 1L, 0L) %>% replace_na(0),
no_childcare = if_else(str_detect(time_main, "no childcare"), 1L, 0L) %>% replace_na(0),
# money
no_cash = if_else(str_detect(money_main, "no cash"), 1L, 0L) %>% replace_na(0),
no_insurance = if_else(str_detect(money_main, "no insurance"), 1L, 0L) %>% replace_na(0),
travel_costs = if_else(str_detect(money_main, "travel costs"), 1L, 0L) %>% replace_na(0),
# availability
no_vax_left = if_else(str_detect(availability_main, "no vaccines left"), 1L, 0L) %>% replace_na(0),
too_far = if_else(str_detect(availability_main, "too far away"), 1L, 0L) %>% replace_na(0),
info_confidence = str_to_lower(info_confidence),
info_confidence_high = if_else(str_detect(info_confidence, "very"), 1L, 0L) %>% replace_na(0),
want_link = if_else(str_detect(want_link, "Sure"), 1L, 0L) %>% replace_na(0),
want_answer = if_else(str_detect(want_answer, "Sure"), 1L, 0L) %>% replace_na(0),
self_reflection = if_else(self_reflection == "A lot!", 1L, 0L) %>% replace_na(0),
# demographics
age,
education = education_num,
religiosity = religiosity_num,
location = location_num,
black_or_african = if_else(ethnicity == "black or african", 1L, 0L) %>% replace_na(0),
vaccinated = vax_status_num,
nigeria = if_else(country_answer == "nigeria", 1, 0) %>% replace_na(0),
kenya = if_else(country_answer == "kenya", 1, 0) %>% replace_na(0),
ghana = if_else(country_answer == "ghana", 1, 0) %>% replace_na(0),
south_africa = if_else(country_answer == "south africa", 1, 0) %>% replace_na(0),
opinion_conv_sentiment = opinion_conv_sentiment %>% replace_na(0)
) %>%
select(starts_with(c("impediments_hr_", "info_source_hr_", "best_treat_", "opinion_hr_")),
covid_already, no_motive, motive_elaboration, no_ability,
ability_elaboration, against_beliefs, no_benefits, risky, no_time, no_money, no_availability,
bad_side_effects, lack_of_testing, not_trust_pharma,
covid_not_dangerous, vaccines_dont_work, freedom_to_choose,
religious_reasons, no_time_off_work, no_time_to_research,
no_childcare, no_cash, no_insurance, travel_costs, no_vax_left, too_far,
info_confidence, info_confidence_high, want_link, want_answer, self_reflection,
age, education, religiosity, location, black_or_african, vaccinated,
nigeria, kenya, ghana, south_africa,
best_treatment, opinion_conv_sentiment,post_want_vax) %>%
select(!info_confidence) %>%
relocate(starts_with("info_source"), .before = info_confidence_high)
df_features_unvax <-
df_features %>%
filter(vaccinated == 0)
df_unvax <-
df %>%
filter(vax_status_num == 0)motive_reason, ability_reason, motive_other, or ability_other.df_unvax %>%
filter(!is.na(motive_reason) | !is.na(ability_reason) | !is.na(motive_other) | !is.na(ability_other)) %>%
select(starts_with("impediments_hr_")) %>%
mutate(total = rowSums(.)) %>%
select(total) %>%
mutate(classified = ifelse(total >0, 1, 0)) %>%
group_by(classified) %>%
count() %>%
ungroup()%>%
mutate(percent_classified = paste(round(n/sum(n) * 100), "%", sep = "")) %>%
filter(classified == 1) %>%
pull(percent_classified) ## [1] "78%"
df_unvax %>%
filter(!is.na(motive_reason) | !is.na(ability_reason) | !is.na(motive_other) | !is.na(ability_other)) %>%
select(starts_with("impediments_hr_")) %>%
mutate(number_of_categories = rowSums(.)) %>%
group_by(number_of_categories) %>%
count(name = "number_of_observations") %>%
ungroup() %>%
mutate(percent_of_observations = paste(round(number_of_observations/sum(number_of_observations)* 100), "%", sep = ""))df_unvax %>%
filter(!is.na(motive_reason) | !is.na(ability_reason) | !is.na(motive_other) | !is.na(ability_other)) %>%
select(starts_with("impediments_hr_")) %>%
summarise_all(.funs = sum) %>%
rename_all(~str_replace(., pattern = "impediments_hr_", "")) %>%
pivot_longer(everything(), names_to = "Heuristic", values_to = "Count") %>%
arrange(desc(Count))df_features_unvax %>%
select(starts_with("impediments_hr_")) %>%
rename_all(~str_replace(., pattern = "impediments_hr_", "")) %>%
cor(use = "pairwise.complete.obs") %>%
ggcorrplot(type = "lower", lab = TRUE, lab_size = 12/.pt, tl.cex = 10) +
labs(y = "", x = "", title = "Correlation Matrix: \nImpediment Heuristics")+
theme(axis.text.x = element_text(angle = 45, hjust=1))info_source.df_unvax %>%
filter(!is.na(info_source)) %>%
select(starts_with("info_source_hr_")) %>%
mutate(total = rowSums(.)) %>%
select(total) %>%
mutate(classified = ifelse(total >0, 1, 0)) %>%
group_by(classified) %>%
count() %>%
ungroup()%>%
mutate(percent_classified = paste(round(n/sum(n) * 100), "%", sep = "")) %>%
filter(classified == 1) %>%
pull(percent_classified) ## [1] "81%"
df_unvax %>%
filter(!is.na(info_source) ) %>%
select(starts_with("info_source_hr_")) %>%
mutate(number_of_categories = rowSums(.)) %>%
group_by(number_of_categories) %>%
count(name = "number_of_observations") %>%
ungroup() %>%
mutate(percent_of_observations = paste(round(number_of_observations/sum(number_of_observations)* 100), "%", sep = ""))df_unvax %>%
filter(!is.na(info_source)) %>%
select(starts_with("info_source_hr_")) %>%
summarise_all(.funs = sum) %>%
rename_all(~str_replace(., pattern = "info_source_hr_", "")) %>%
pivot_longer(everything(), names_to = "Heuristic", values_to = "Count") %>%
arrange(desc(Count))df_features_unvax %>%
select(starts_with("info_source_hr_")) %>%
rename_all(~str_replace(., pattern = "info_source_hr_", "")) %>%
cor(use = "pairwise.complete.obs") %>%
ggcorrplot(type = "lower", lab = TRUE, lab_size = 12/.pt, tl.cex = 10) +
labs(y = "", x = "", title = "Correlation Matrix: \nInfo Source Heuristics")+
theme(axis.text.x = element_text(angle = 45, hjust=1))best_treatment_proposal.df_unvax %>%
filter(!is.na(best_treatment_proposal)) %>%
select(starts_with("best_treat_")) %>%
mutate(total = rowSums(.)) %>%
select(total) %>%
mutate(classified = ifelse(total >0, 1, 0)) %>%
group_by(classified) %>%
count() %>%
ungroup()%>%
mutate(percent_classified = paste(round(n/sum(n) * 100), "%", sep = "")) %>%
filter(classified == 1) %>%
pull(percent_classified) ## [1] "65%"
df_unvax %>%
filter(!is.na(best_treatment_proposal)) %>%
select(starts_with("best_treat_")) %>%
mutate(number_of_categories = rowSums(.)) %>%
group_by(number_of_categories) %>%
count(name = "number_of_observations") %>%
ungroup() %>%
mutate(percent_of_observations = paste(round(number_of_observations/sum(number_of_observations)* 100), "%", sep = ""))df_unvax %>%
filter(!is.na(best_treatment_proposal)) %>%
select(starts_with("best_treat_")) %>%
summarise_all(.funs = sum) %>%
rename_all(~str_replace(., pattern = "best_treat_", "")) %>%
pivot_longer(everything(), names_to = "Heuristic", values_to = "Count") %>%
arrange(desc(Count))df_features_unvax %>%
select(starts_with("best_treat_")) %>%
rename_all(~str_replace(., pattern = "best_treat_", "")) %>%
cor(use = "pairwise.complete.obs") %>%
ggcorrplot(type = "lower", lab = TRUE, lab_size = 12/.pt, tl.cex = 10) +
labs(y = "", x = "", title = "Correlation Matrix: \nBest Treatment Proposal Heuristics")+
theme(axis.text.x = element_text(angle = 45, hjust=1))opinion_friend_family or opinion_conversation.df_unvax %>%
filter(!is.na(opinion_friend_family) | !is.na(opinion_conversation)) %>%
select(starts_with("opinion_hr_")) %>%
mutate(total = rowSums(.)) %>%
select(total) %>%
mutate(classified = ifelse(total >0, 1, 0)) %>%
group_by(classified) %>%
count() %>%
ungroup()%>%
mutate(percent_classified = paste(round(n/sum(n) * 100), "%", sep = "")) %>%
filter(classified == 1) %>%
pull(percent_classified) ## [1] "70%"
df_unvax %>%
filter(!is.na(opinion_friend_family) | !is.na(opinion_conversation)) %>%
select(starts_with("opinion_hr_")) %>%
mutate(number_of_categories = rowSums(.)) %>%
group_by(number_of_categories) %>%
count(name = "number_of_observations") %>%
ungroup() %>%
mutate(percent_of_observations = paste(round(number_of_observations/sum(number_of_observations)* 100), "%", sep = ""))df_unvax %>%
filter(!is.na(opinion_friend_family) | !is.na(opinion_conversation)) %>%
select(starts_with("opinion_hr_")) %>%
summarise_all(.funs = sum) %>%
rename_all(~str_replace(., pattern = "opinion_hr_", "")) %>%
pivot_longer(everything(), names_to = "Heuristic", values_to = "Count") %>%
arrange(desc(Count))df_features_unvax %>%
select(starts_with("opinion_hr_")) %>%
rename_all(~str_replace(., pattern = "opinion_hr_", "")) %>%
cor(use = "pairwise.complete.obs") %>%
ggcorrplot(type = "lower", lab = TRUE, lab_size = 12/.pt, tl.cex = 10) +
labs(y = "", x = "", title = "Correlation Matrix: \nOpinion Heuristics")+
theme(axis.text.x = element_text(angle = 45, hjust=1))