1 Overview

The present user segmentation analysis is based on 7318 respondents completing our survey for pilot wave 8 (August 2022), and 81 features (both MCQ and free text). The related GitHub issue is here.

Analysis outline

Summary statistics
- Split by motive/ability impediment
- Split by treatment

# Load packages
pacman::p_load(DT, estimatr, kableExtra, readr, reshape2, tidyverse, xtable, dataMaid, ggcorrplot, ggmap, rpart, rpart.plot, pollster, wordcloud, tm, RColorBrewer, hrbrthemes, janitor, purrr, gridExtra, cowplot, rcompanion, nnet, texreg, compareGroups, factoextra, cluster, fastDummies, simputation, sentimentr, politeness, textir, glmnet, gamlr, tm, topicmodels, ldatuning, lda, SnowballC, olsrr, here)

set.seed(94305)

# read in CURRENT chatfuel data

# full data
df_full_v8 <- 
  read_csv(here("pilot8/data/full_df_clean.csv")) %>% 
  clean_names() %>% 
  mutate_if(is.character, ~ str_replace_all(., '[\n\t]', '')) %>%
  mutate(
    motive = if_else(str_detect(motive, "yes"), "yes", "no"),
    motive_main = if_else(str_detect(motive_main, "risk"), "risk", motive_main),
    best_treatment = str_remove_all(best_treatment, "\\.") %>% str_to_sentence(),
    best_treatment = if_else(best_treatment == "New trusted info", "More safety evidence", best_treatment),
  ) %>%
  remove_empty("rows")

# filter to completes and add features
df <-
  df_full_v8 %>%
  filter(full_complete == "complete") %>% 
  drop_na(vax_status)# %>%
#  mutate(
 #   best_treatment = case_when(
 #     best_treatment == "Text answer" & str_detect(treatment_text, "easier access to vaccine") ~ "Convenient to vaccinate",
#      best_treatment == "Text answer" & str_detect(treatment_text, "nothing") ~ "Nothing",
#      best_treatment == "Text answer" & str_detect(treatment_text, "family|encourag") ~ "Family/friend endorses it",
##      best_treatment == "Text answer" & str_detect(treatment_text, "safe|info") ~ "More safety evidence",
#      best_treatment == "Text answer" & str_detect(treatment_text, "mandate") ~ "Job/school required it",
#      best_treatment == "Text answer" & str_detect(treatment_text, "rewards") ~ "Rewards for vaccinating",
#      best_treatment == "Text answer" & str_detect(treatment_text, "time") ~ "Time off work",
#      TRUE ~ best_treatment,
#    ),
#  )

2 Summary Statistics

We start with cleaning chatbot features to be used in our models. Below are summary statistics for the cleaned features fed into the models.

# clean up demographic variables

df_features <-
  df %>% 
  bind_cols(
    df %>%
      pull(opinion_conversation) %>%
      get_sentences() %>%
      sentiment_by() %>% 
      transmute(opinion_conv_sentiment = ave_sentiment)
  ) %>%
  mutate(
    
    covid_already = if_else(str_detect(covid_already, "No|no"), 0L, 1L) %>% replace_na(0),
    no_motive = case_when(
      motive == "yes" ~ 0L,
      motive == "no" ~ 1L,
    ),
    motive_elaboration = motive_nchar,
    # post_want_vax,
    no_ability = case_when(
      ability == "easy" ~ 0L,
      ability != "easy" & !is.na(ability) ~ 1L,
    ) %>% replace_na(0),
    ability_elaboration = ability_nchar %>% replace_na(0),
    
    against_beliefs = if_else(motive_main == "beliefs", 1L, 0L) %>% replace_na(0),
    no_benefits = if_else(motive_main == "benefit", 1L, 0L) %>% replace_na(0),
    risky = if_else(motive_main == "risk", 1L, 0L) %>% replace_na(0),
    
    no_time = if_else(ability_main == "time", 1L, 0L) %>% replace_na(0),
    no_money = if_else(ability_main == "money", 1L, 0L) %>% replace_na(0),
    no_availability = if_else(ability_main == "availability", 1L, 0L) %>% replace_na(0),
    
    # risk
    bad_side_effects = if_else(str_detect(risk_main, "bad side effects"), 1L, 0L) %>% replace_na(0),
    lack_of_testing = if_else(str_detect(risk_main, "not enough testing"), 1L, 0L) %>% replace_na(0),
    not_trust_pharma = if_else(str_detect(risk_main, "not trust phar") | str_detect(belief_main, "not trust phar"), 1L, 0L) %>% replace_na(0),
    # benefit
    covid_not_dangerous = if_else(str_detect(benefit_main, "covid not dangerous"), 1L, 0L) %>% replace_na(0),
    # had_covid_before = if_else(str_detect(benefit_main, "had covid before"), 1L, 0L) %>% replace_na(0),
    vaccines_dont_work = if_else(str_detect(benefit_main, "vaccines don't work"), 1L, 0L) %>% replace_na(0),
    # belief
    freedom_to_choose = if_else(str_detect(belief_main, "freedom to choose"), 1L, 0L) %>% replace_na(0),
    religious_reasons = if_else(str_detect(belief_main, "religious reasons"), 1L, 0L) %>% replace_na(0),
    
    # time
    no_time_off_work = if_else(str_detect(time_main, "hard to get off work"), 1L, 0L) %>% replace_na(0),
    no_time_to_research = if_else(str_detect(time_main, "no time to research"), 1L, 0L) %>% replace_na(0),
    no_childcare = if_else(str_detect(time_main, "no childcare"), 1L, 0L) %>% replace_na(0),
    # money
    no_cash = if_else(str_detect(money_main, "no cash"), 1L, 0L) %>% replace_na(0),
    no_insurance = if_else(str_detect(money_main, "no insurance"), 1L, 0L) %>% replace_na(0),
    travel_costs = if_else(str_detect(money_main, "travel costs"), 1L, 0L) %>% replace_na(0),
    # availability
    no_vax_left = if_else(str_detect(availability_main, "no vaccines left"), 1L, 0L) %>% replace_na(0),
    too_far = if_else(str_detect(availability_main, "too far away"), 1L, 0L) %>% replace_na(0),
    
    # info variables
    info_source = str_squish(info_source) %>% str_to_lower(),
    info_source = case_when(
      str_detect(info_source, "social|facebook|twitter|whatsapp|fb|insta|tiktok|youtube") ~ "social media",
      str_detect(info_source, "online|google|internet|web") ~ "internet",
      str_detect(info_source, "media|tv|television") ~ "television",
      str_detect(info_source, "radio") ~ "radio",
      str_detect(info_source, "news") ~ "news",
      str_detect(info_source, "friend|family|work|elder") ~ "friends & family",
      str_detect(info_source, "health|hospital|clinic") ~ "hospital",
      str_detect(info_source, "school|teach|educ") ~ "school",
    ),
    info_confidence = str_to_lower(info_confidence),
    info_confidence_high = if_else(str_detect(info_confidence, "very"), 1L, 0L) %>% replace_na(0),
    want_link = if_else(str_detect(want_link, "Sure"), 1L, 0L) %>% replace_na(0),
    want_answer = if_else(str_detect(want_answer, "Sure"), 1L, 0L) %>% replace_na(0),
    self_reflection = if_else(self_reflection == "A lot!", 1L, 0L) %>% replace_na(0),
    
    # demographics
    age,
    education = education_num,
    religiosity = religiosity_num,
    location = location_num,
    black_or_african = if_else(ethnicity == "black or african", 1L, 0L) %>% replace_na(0),
    vaccinated = vax_status_num,
    nigeria = if_else(country_answer == "nigeria", 1, 0) %>% replace_na(0),
    kenya = if_else(country_answer == "kenya", 1, 0) %>% replace_na(0),
    ghana = if_else(country_answer == "ghana", 1, 0) %>% replace_na(0),
    south_africa = if_else(country_answer == "south africa", 1, 0) %>% replace_na(0),
    
    best_treatment,
    
    ## cleaned free text features ##
    
    opinion_conv_sentiment = opinion_conv_sentiment %>% replace_na(0),
    
    opinion_friend  = case_when(
      
      str_detect(opinion_friend_family, "(((negative|sides?) ?-?)?(a|e)fff?ects?[^ive]|ma(k|d)(e|es|ing) (me|people|them|you) sick|g(e|o)t(ting)? sick|more sick|sick effect)|(after|side|the) effect|react|bad effect|aftermath|vomit|ache|cause health problems|dizzy|negative effect|sick after|clot|fever|fatigue|health complication afterward|weakens my body|getting ill|collapse|falling down|dizziness") ~ "side_effects",
      
      str_detect(opinion_friend_family, "needles?|injections?") ~ "needles_injection",
      
      str_detect(opinion_friend_family, "not good for my body|safe|harm|dangerous|dangers|unhealthy|bad for my health") ~ "unsafe",
      
      str_detect(opinion_friend_family, "lab rats|paralyzed|illuminati|rumurs|infertil|misinformed|mis ?information|ro?umou?rs?|myths?|theories|conspiracy|scam|zombie|controvers|romours|robot|political|politics|criticism|propaganda|gossip|online|misleading info|false info|misconceptions?|(fake )?news|article|magnetic|hoax|fake") ~ "misinformation",
      
      str_detect(opinion_friend_family, "scar(e|y)|fear|afraid") ~ "scared", 
      
      str_detect(opinion_friend_family, "((don\'?t|didn'?t|did not|do not|no) (really )?trust|trusted|distrust|doubt|s(k|c)eptical)|not trusting it|untrustworthy|the reviews") ~ "trust",
      
      str_detect(opinion_friend_family, "preg3|am expectant|breastfeeding|pregnan|lactating|newborn|infant|nursing") ~ "pregnancy_nursing",
      
      str_detect(opinion_friend_family, "pain|hurt") ~ "pain", 
      
      str_detect(opinion_friend_family, "demonic|religio|church|god|christian|antichrist|666") ~ "religion", 
      
      str_detect(opinion_friend_family, "die|death|dead|kill|dying") ~ "death", 
      
      str_detect(opinion_friend_family, "soceity judgment|sibling|family|friends?|parents?|brother|sister|uncle|aunt|community|cousin") ~ "family_friends",
      
      str_detect(opinion_friend_family, "risk") ~ "risk", 
      
      str_detect(opinion_friend_family, "more research|enough proof|i did see enough proof|no sure prove|unanswered questions|limited data|not informed|knowledge|information|didn't know|i don't (really )?know") ~ "more_information", 
      
      str_detect(opinion_friend_family, "government") ~ "government", 
      
      str_detect(opinion_friend_family, "benefit") ~ "no_benefit", 
      
      str_detect(opinion_friend_family, "effective|no proof it works") ~ "ineffective", 
      
      str_detect(opinion_friend_family, "i don'?t have reason|^not really$|no reason|nothing|not? interest|don't feel like|^none$") ~ "no_reason", 
      
      str_detect(opinion_friend_family, "not real[^ly]|no covid|exist|is real") ~ "covid_not_real", 
      
      str_detect(opinion_friend_family, "did not think it'?s important|not common|didn'?t think it was necessary|is gone|few cases|less prevalent|almost over|post\\-? ?covid|isn't anything serious|not important|already controlled|i don't find it necessary|isn't as prevalent|covid is gone|i don't really hear of the covid|rates have decreased|spread.*is low|not serious|not that important|no need|not help|not? use|no more|no longer|is over|lowered|need |finished|necessity|not necessary|i don't think it'?s necessary|i don'?t see the point") ~ "no_need",
      
      str_detect(opinion_friend_family, "listened to people|negative reviews|people talk|talking negative things|what people say|(bad|negative) report|negative things from social media|condemning|opinion|bad reviews|hearing negative things|people say|negativity|heare?d|hearsay|stories|talks|discussion|speculation|saying|feedback") ~ "heard_hearsay", 
      
      str_detect(opinion_friend_family, "belie(f|ve)") ~ "beliefs", 
      
      str_detect(opinion_friend_family, "never met a victim|i haven't seen someone|never saw|never seen|no one (has|from)|i have never someone") ~ "never_saw_covid",
      
      str_detect(opinion_friend_family, "my immune is strong|okay with my health|am good/fine|am technically okay|my immunity system|(i'?m|am) not sick|good health|i feel (healthy|fine)|i feel (very )?ok(ay)?|healthy? is good|am healthy|healthy person|high immunity|healthy lifestyle|am just healthy|fine healthy|still healthy|i was healthy|i am just healthy|always been healthy|eating healthy|^(?!.*(weak|affect my)).*immune system|strong immunity") ~ "healthy",
      
      str_detect(opinion_friend_family, "asthmatic|weak immune system|health issue|it's not good for human health|not good for my health|no good for health|allergies|health issues|body weakness|chronic illness|high blood pressure") ~ "health_issues",
      
      str_detect(opinion_friend_family, "distant visit|near the place|nearby|nearest place|far |not available in my area") ~ "distance",
      
      str_detect(opinion_friend_family, "office|not living close|at work|requirement at work|work related|job") ~ "work",
      
      str_detect(opinion_friend_family, "travel") ~ "travel", 
      
      str_detect(opinion_friend_family, "nowhere near contacting|am not infected|i had not gotten corona|don'?t have.*(virus|covid|symptoms|it$)") ~ "dont_have_covid",
      
      str_detect(opinion_friend_family, "^(yes|nice|noo?|ok(ay)?|bad|yes sure|nope?|unsure|pardon|lol)$|https\\:\\/\\/|personal reasons") | nchar(opinion_friend_family) <3 ~ "nonsensical",
      
      str_detect(opinion_friend_family, "availab") ~ "no_availability",
      
      str_detect(opinion_friend_family, "expensive|money|lack of money|cost") ~ "need_money",
      
      TRUE ~ NA_character_,
    ),
    
    opinion_conv = case_when(
      
      str_detect(opinion_conversation, "(((negative|sides?) ?-?)?(a|e)fff?ects?[^ive]|ma(k|d)(e|es|ing) (me|people|them|you) sick|g(e|o)t(ting)? sick|more sick|sick effect)|(after|side|the) effect|react|bad effect|aftermath|vomit|ache|cause health problems|dizzy|negative effect|sick after|clot|fever|fatigue|health complication afterward|weakens my body|getting ill|collapse|falling down|dizziness") ~ "side_effects",
      
      str_detect(opinion_conversation, "needles?|injections?") ~ "needles_injection",
      
      str_detect(opinion_conversation, "not good for my body|safe|harm|dangerous|dangers|unhealthy|bad for my health") ~ "unsafe",
      
      str_detect(opinion_conversation, "lab rats|paralyzed|illuminati|rumurs|infertil|misinformed|mis ?information|ro?umou?rs?|myths?|theories|conspiracy|scam|zombie|controvers|romours|robot|political|politics|criticism|propaganda|gossip|online|misleading info|false info|misconceptions?|(fake )?news|article|magnetic|hoax|fake") ~ "misinformation",
      
      str_detect(opinion_conversation, "scar(e|y)|fear|afraid") ~ "scared", 
      
      str_detect(opinion_conversation, "((don\'?t|didn'?t|did not|do not|no) (really )?trust|trusted|distrust|doubt|s(k|c)eptical)|not trusting it|untrustworthy|the reviews") ~ "trust",
      
      str_detect(opinion_conversation, "preg3|am expectant|breastfeeding|pregnan|lactating|newborn|infant|nursing") ~ "pregnancy_nursing",
      
      str_detect(opinion_conversation, "pain|hurt") ~ "pain", 
      
      str_detect(opinion_conversation, "demonic|religio|church|god|christian|antichrist|666") ~ "religion", 
      
      str_detect(opinion_conversation, "die|death|dead|kill|dying") ~ "death", 
      
      str_detect(opinion_conversation, "soceity judgment|sibling|family|friends?|parents?|brother|sister|uncle|aunt|community|cousin") ~ "family_friends",
      
      str_detect(opinion_conversation, "risk") ~ "risk", 
      
      str_detect(opinion_conversation, "more research|enough proof|i did see enough proof|no sure prove|unanswered questions|limited data|not informed|knowledge|information|didn't know|i don't (really )?know") ~ "more_information", 
      
      str_detect(opinion_conversation, "government") ~ "government", 
      
      str_detect(opinion_conversation, "benefit") ~ "no_benefit", 
      
      str_detect(opinion_conversation, "effective|no proof it works") ~ "ineffective", 
      
      str_detect(opinion_conversation, "i don'?t have reason|^not really$|no reason|nothing|not? interest|don't feel like|^none$") ~ "no_reason", 
      
      str_detect(opinion_conversation, "not real[^ly]|no covid|exist|is real") ~ "covid_not_real", 
      
      str_detect(opinion_conversation, "did not think it'?s important|not common|didn'?t think it was necessary|is gone|few cases|less prevalent|almost over|post\\-? ?covid|isn't anything serious|not important|already controlled|i don't find it necessary|isn't as prevalent|covid is gone|i don't really hear of the covid|rates have decreased|spread.*is low|not serious|not that important|no need|not help|not? use|no more|no longer|is over|lowered|need |finished|necessity|not necessary|i don't think it'?s necessary|i don'?t see the point") ~ "no_need",
      
      str_detect(opinion_conversation, "listened to people|negative reviews|people talk|talking negative things|what people say|(bad|negative) report|negative things from social media|condemning|opinion|bad reviews|hearing negative things|people say|negativity|heare?d|hearsay|stories|talks|discussion|speculation|saying|feedback") ~ "heard_hearsay", 
      
      str_detect(opinion_conversation, "belie(f|ve)") ~ "beliefs", 
      
      str_detect(opinion_conversation, "never met a victim|i haven't seen someone|never saw|never seen|no one (has|from)|i have never someone") ~ "never_saw_covid",
      
      str_detect(opinion_conversation, "my immune is strong|okay with my health|am good/fine|am technically okay|my immunity system|(i'?m|am) not sick|good health|i feel (healthy|fine)|i feel (very )?ok(ay)?|healthy? is good|am healthy|healthy person|high immunity|healthy lifestyle|am just healthy|fine healthy|still healthy|i was healthy|i am just healthy|always been healthy|eating healthy|^(?!.*(weak|affect my)).*immune system|strong immunity") ~ "healthy",
      
      str_detect(opinion_conversation, "asthmatic|weak immune system|health issue|it's not good for human health|not good for my health|no good for health|allergies|health issues|body weakness|chronic illness|high blood pressure") ~ "health_issues",
      
      str_detect(opinion_conversation, "distant visit|near the place|nearby|nearest place|far |not available in my area") ~ "distance",
      
      str_detect(opinion_conversation, "office|not living close|at work|requirement at work|work related|job") ~ "work",
      
      str_detect(opinion_conversation, "travel") ~ "travel", 
      
      str_detect(opinion_conversation, "nowhere near contacting|am not infected|i had not gotten corona|don'?t have.*(virus|covid|symptoms|it$)") ~ "dont_have_covid",
      
      str_detect(opinion_conversation, "^(yes|nice|noo?|ok(ay)?|bad|yes sure|nope?|unsure|pardon|lol)$|https\\:\\/\\/|personal reasons") | nchar(opinion_conversation) <3 ~ "nonsensical",
      
      str_detect(opinion_conversation, "availab") ~ "no_availability",
      
      str_detect(opinion_conversation, "expensive|money|lack of money|cost") ~ "need_money",
      
      TRUE ~ NA_character_,
    ),
    
    motive_other  = case_when(
      
      str_detect(motive_other, "(((negative|sides?) ?-?)?(a|e)fff?ects?[^ive]|ma(k|d)(e|es|ing) (me|people|them|you) sick|g(e|o)t(ting)? sick|more sick|sick effect)|(after|side|the) effect|react|bad effect|aftermath|vomit|ache|cause health problems|dizzy|negative effect|sick after|clot|fever|fatigue|health complication afterward|weakens my body|getting ill|collapse|falling down|dizziness") ~ "side_effects",
      
      str_detect(motive_other, "needles?|injections?") ~ "needles_injection",
      
      str_detect(motive_other, "not good for my body|safe|harm|dangerous|dangers|unhealthy|bad for my health") ~ "unsafe",
      
      str_detect(motive_other, "lab rats|paralyzed|illuminati|rumurs|infertil|misinformed|mis ?information|ro?umou?rs?|myths?|theories|conspiracy|scam|zombie|controvers|romours|robot|political|politics|criticism|propaganda|gossip|online|misleading info|false info|misconceptions?|(fake )?news|article|magnetic|hoax|fake") ~ "misinformation",
      
      str_detect(motive_other, "scar(e|y)|fear|afraid") ~ "scared", 
      
      str_detect(motive_other, "((don\'?t|didn'?t|did not|do not|no) (really )?trust|trusted|distrust|doubt|s(k|c)eptical)|not trusting it|untrustworthy|the reviews") ~ "trust",
      
      str_detect(motive_other, "preg3|am expectant|breastfeeding|pregnan|lactating|newborn|infant|nursing") ~ "pregnancy_nursing",
      
      str_detect(motive_other, "pain|hurt") ~ "pain", 
      
      str_detect(motive_other, "demonic|religio|church|god|christian|antichrist|666") ~ "religion", 
      
      str_detect(motive_other, "die|death|dead|kill|dying") ~ "death", 
      
      str_detect(motive_other, "soceity judgment|sibling|family|friends?|parents?|brother|sister|uncle|aunt|community|cousin") ~ "family_friends",
      
      str_detect(motive_other, "risk") ~ "risk", 
      
      str_detect(motive_other, "more research|enough proof|i did see enough proof|no sure prove|unanswered questions|limited data|not informed|knowledge|information|didn't know|i don't (really )?know") ~ "more_information", 
      
      str_detect(motive_other, "government") ~ "government", 
      
      str_detect(motive_other, "benefit") ~ "no_benefit", 
      
      str_detect(motive_other, "effective|no proof it works") ~ "ineffective", 
      
      str_detect(motive_other, "i don'?t have reason|^not really$|no reason|nothing|not? interest|don't feel like|^none$") ~ "no_reason", 
      
      str_detect(motive_other, "not real[^ly]|no covid|exist|is real") ~ "covid_not_real", 
      
      str_detect(motive_other, "did not think it'?s important|not common|didn'?t think it was necessary|is gone|few cases|less prevalent|almost over|post\\-? ?covid|isn't anything serious|not important|already controlled|i don't find it necessary|isn't as prevalent|covid is gone|i don't really hear of the covid|rates have decreased|spread.*is low|not serious|not that important|no need|not help|not? use|no more|no longer|is over|lowered|need |finished|necessity|not necessary|i don't think it'?s necessary|i don'?t see the point") ~ "no_need",
      
      str_detect(motive_other, "listened to people|negative reviews|people talk|talking negative things|what people say|(bad|negative) report|negative things from social media|condemning|opinion|bad reviews|hearing negative things|people say|negativity|heare?d|hearsay|stories|talks|discussion|speculation|saying|feedback") ~ "heard_hearsay", 
      
      str_detect(motive_other, "belie(f|ve)") ~ "beliefs", 
      
      str_detect(motive_other, "never met a victim|i haven't seen someone|never saw|never seen|no one (has|from)|i have never someone") ~ "never_saw_covid",
      
      str_detect(motive_other, "my immune is strong|okay with my health|am good/fine|am technically okay|my immunity system|(i'?m|am) not sick|good health|i feel (healthy|fine)|i feel (very )?ok(ay)?|healthy? is good|am healthy|healthy person|high immunity|healthy lifestyle|am just healthy|fine healthy|still healthy|i was healthy|i am just healthy|always been healthy|eating healthy|^(?!.*(weak|affect my)).*immune system|strong immunity") ~ "healthy",
      
      str_detect(motive_other, "asthmatic|weak immune system|health issue|it's not good for human health|not good for my health|no good for health|allergies|health issues|body weakness|chronic illness|high blood pressure") ~ "health_issues",
      
      str_detect(motive_other, "distant visit|near the place|nearby|nearest place|far |not available in my area") ~ "distance",
      
      str_detect(motive_other, "office|not living close|at work|requirement at work|work related|job") ~ "work",
      
      str_detect(motive_other, "travel") ~ "travel", 
      
      str_detect(motive_other, "nowhere near contacting|am not infected|i had not gotten corona|don'?t have.*(virus|covid|symptoms|it$)") ~ "dont_have_covid",
      
      str_detect(motive_other, "^(yes|nice|noo?|ok(ay)?|bad|yes sure|nope?|unsure|pardon|lol)$|https\\:\\/\\/|personal reasons") | nchar(motive_other) <3 ~ "nonsensical",
      
      str_detect(motive_other, "availab") ~ "no_availability",
      
      str_detect(motive_other, "expensive|money|lack of money|cost") ~ "need_money",
      
      TRUE ~ NA_character_,
    ),
    
    ability_other  = case_when(
      
      str_detect(ability_other, "(((negative|sides?) ?-?)?(a|e)fff?ects?[^ive]|ma(k|d)(e|es|ing) (me|people|them|you) sick|g(e|o)t(ting)? sick|more sick|sick effect)|(after|side|the) effect|react|bad effect|aftermath|vomit|ache|cause health problems|dizzy|negative effect|sick after|clot|fever|fatigue|health complication afterward|weakens my body|getting ill|collapse|falling down|dizziness") ~ "side_effects",
      
      str_detect(ability_other, "needles?|injections?") ~ "needles_injection",
      
      str_detect(ability_other, "not good for my body|safe|harm|dangerous|dangers|unhealthy|bad for my health") ~ "unsafe",
      
      str_detect(ability_other, "lab rats|paralyzed|illuminati|rumurs|infertil|misinformed|mis ?information|ro?umou?rs?|myths?|theories|conspiracy|scam|zombie|controvers|romours|robot|political|politics|criticism|propaganda|gossip|online|misleading info|false info|misconceptions?|(fake )?news|article|magnetic|hoax|fake") ~ "misinformation",
      
      str_detect(ability_other, "scar(e|y)|fear|afraid") ~ "scared", 
      
      str_detect(ability_other, "((don\'?t|didn'?t|did not|do not|no) (really )?trust|trusted|distrust|doubt|s(k|c)eptical)|not trusting it|untrustworthy|the reviews") ~ "trust",
      
      str_detect(ability_other, "preg3|am expectant|breastfeeding|pregnan|lactating|newborn|infant|nursing") ~ "pregnancy_nursing",
      
      str_detect(ability_other, "pain|hurt") ~ "pain", 
      
      str_detect(ability_other, "demonic|religio|church|god|christian|antichrist|666") ~ "religion", 
      
      str_detect(ability_other, "die|death|dead|kill|dying") ~ "death", 
      
      str_detect(ability_other, "soceity judgment|sibling|family|friends?|parents?|brother|sister|uncle|aunt|community|cousin") ~ "family_friends",
      
      str_detect(ability_other, "risk") ~ "risk", 
      
      str_detect(ability_other, "more research|enough proof|i did see enough proof|no sure prove|unanswered questions|limited data|not informed|knowledge|information|didn't know|i don't (really )?know") ~ "more_information", 
      
      str_detect(ability_other, "government") ~ "government", 
      
      str_detect(ability_other, "benefit") ~ "no_benefit", 
      
      str_detect(ability_other, "effective|no proof it works") ~ "ineffective", 
      
      str_detect(ability_other, "i don'?t have reason|^not really$|no reason|nothing|not? interest|don't feel like|^none$") ~ "no_reason", 
      
      str_detect(ability_other, "not real[^ly]|no covid|exist|is real") ~ "covid_not_real", 
      
      str_detect(ability_other, "did not think it'?s important|not common|didn'?t think it was necessary|is gone|few cases|less prevalent|almost over|post\\-? ?covid|isn't anything serious|not important|already controlled|i don't find it necessary|isn't as prevalent|covid is gone|i don't really hear of the covid|rates have decreased|spread.*is low|not serious|not that important|no need|not help|not? use|no more|no longer|is over|lowered|need |finished|necessity|not necessary|i don't think it'?s necessary|i don'?t see the point") ~ "no_need",
      
      str_detect(ability_other, "listened to people|negative reviews|people talk|talking negative things|what people say|(bad|negative) report|negative things from social media|condemning|opinion|bad reviews|hearing negative things|people say|negativity|heare?d|hearsay|stories|talks|discussion|speculation|saying|feedback") ~ "heard_hearsay", 
      
      str_detect(ability_other, "belie(f|ve)") ~ "beliefs", 
      
      str_detect(ability_other, "never met a victim|i haven't seen someone|never saw|never seen|no one (has|from)|i have never someone") ~ "never_saw_covid",
      
      str_detect(ability_other, "my immune is strong|okay with my health|am good/fine|am technically okay|my immunity system|(i'?m|am) not sick|good health|i feel (healthy|fine)|i feel (very )?ok(ay)?|healthy? is good|am healthy|healthy person|high immunity|healthy lifestyle|am just healthy|fine healthy|still healthy|i was healthy|i am just healthy|always been healthy|eating healthy|^(?!.*(weak|affect my)).*immune system|strong immunity") ~ "healthy",
      
      str_detect(ability_other, "asthmatic|weak immune system|health issue|it's not good for human health|not good for my health|no good for health|allergies|health issues|body weakness|chronic illness|high blood pressure") ~ "health_issues",
      
      str_detect(ability_other, "distant visit|near the place|nearby|nearest place|far |not available in my area") ~ "distance",
      
      str_detect(ability_other, "office|not living close|at work|requirement at work|work related|job") ~ "work",
      
      str_detect(ability_other, "travel") ~ "travel", 
      
      str_detect(ability_other, "nowhere near contacting|am not infected|i had not gotten corona|don'?t have.*(virus|covid|symptoms|it$)") ~ "dont_have_covid",
      
      str_detect(ability_other, "^(yes|nice|noo?|ok(ay)?|bad|yes sure|nope?|unsure|pardon|lol)$|https\\:\\/\\/|personal reasons") | nchar(ability_other) <3 ~ "nonsensical",
      
      str_detect(ability_other, "availab") ~ "no_availability",
      
      str_detect(ability_other, "expensive|money|lack of money|cost") ~ "need_money",
      
      TRUE ~ NA_character_,
    ),
    
    #treatment = treatment_text,
    
  ) %>% 
  select(starts_with(c("motive_reason_", "ability_reason_", "best_treat_")), 
       covid_already, no_motive, motive_elaboration, no_ability, 
       ability_elaboration, against_beliefs, no_benefits, risky, no_time, no_money, no_availability,
       bad_side_effects, lack_of_testing, not_trust_pharma,
       covid_not_dangerous, vaccines_dont_work, freedom_to_choose,
       religious_reasons, no_time_off_work, no_time_to_research,
       no_childcare, no_cash, no_insurance, travel_costs, no_vax_left, too_far,info_source, 
       info_confidence, info_confidence_high, want_link, want_answer, self_reflection,
       age, education, religiosity, location, black_or_african, vaccinated, nigeria, kenya, ghana, south_africa, 
       best_treatment, opinion_conv_sentiment, opinion_friend, opinion_conv, 
       motive_other, ability_other, 
       post_want_vax) %>%
  select(!info_confidence & !motive_reason_consistency) %>%
  dummy_cols(select_columns = "info_source", remove_most_frequent_dummy = TRUE, remove_selected_columns = TRUE, ignore_na = TRUE) %>%
  dummy_cols(select_columns = "opinion_friend", remove_most_frequent_dummy = TRUE, remove_selected_columns = TRUE, ignore_na = TRUE) %>%
  dummy_cols(select_columns = "opinion_conv", remove_most_frequent_dummy = TRUE, remove_selected_columns = TRUE, ignore_na = TRUE) %>%
  dummy_cols(select_columns = "motive_other", remove_most_frequent_dummy = TRUE, remove_selected_columns = TRUE, ignore_na = TRUE) %>%
  dummy_cols(select_columns = "ability_other", remove_most_frequent_dummy = TRUE, remove_selected_columns = TRUE, ignore_na = TRUE) %>%
  #dummy_cols(select_columns = "treatment", remove_most_frequent_dummy = TRUE, remove_selected_columns = TRUE, ignore_na = TRUE) %>%
  mutate_at(vars(starts_with(c("info", "motive", "treatment"))), ~ replace_na(., 0)) %>% 
  relocate(starts_with("info_source"), .before = info_confidence_high)

df_features_unvax <- 
  df_features %>%
  filter(vaccinated == 0)

df_unvax <- 
  df %>%
  filter(vax_status_num == 0)

2.1 Summary Statistics for Cleaned Features

Summary statistics for unvaxxed respondents only:

df_features_unvax %>%
  clean_names(case = "title") %>%
  papeR::summarize_numeric() %>%
  datatable(options = list(pageLength = 10, columnDefs = list(list(orderable = TRUE, targets = 0))))

2.2 Summary Statistics by Motive Impediment

The heatmap below shows the mean values of features (along the y-axis) for each of the motive impediments(along the x-axis). Each feature is normalized before aggregating so that its mean across motive impediments is 0. Hence, for each , reds on the plot show values below the mean across motive impediments, whereas greens on the plot show values above it.

Note: If motive_main == “other” (the user typed free text), then they did not answer motive_reason. This is why there is a block of gray for this column for the “lack motive” features.

2.2.1 Heat Maps

# observations by group
motive_reason_viz <- function(group){

  n_by_group <- df_unvax$motive_main %>% table
  
  df_features_unvax %>% 
    mutate(motive_main = df_unvax$motive_main) %>%
    select(!best_treatment & !no_motive & !risky & !against_beliefs & !no_benefits & !motive_elaboration & !vaccinated) %>%
    select(motive_main, all_of(group)) %>%
    filter(!is.na(motive_main)) %>%
    mutate_at(vars(-motive_main), ~ scale(.) %>% as.vector()) %>% 
    group_by(motive_main) %>% 
    summarise_all(list(sep_mean = ~ mean(., na.rm = T), sep_sd = ~ sd(., na.rm =T))) %>%
    pivot_longer(cols = !motive_main, names_to = c("name")) %>%
    separate(name, into = c("name", "value_type"), sep = "_sep_") %>%
    mutate(
      motive_main = factor(motive_main),
      name = str_to_title(str_replace_all(string = name, pattern  = "_", replacement = " ")) %>% fct_inorder() %>% fct_rev()
    ) %>%
    pivot_wider(names_from = value_type, values_from = value) %>% 
    mutate(
      mean = round(mean, 2),
      mean_char = as.character(mean),
      mean_char = if_else(mean > 0.4, "0.4+", mean_char),
      mean = if_else(mean > 0.4, 0.4, mean),
      mean_char = if_else(mean < -0.4, "-0.4", mean_char),
      mean = if_else(mean < -0.4, -0.4, mean),
      sd = round(sd, 2),
      label_str = str_c(mean_char, " (", sd, ")"),   
      motive_main = factor(str_to_title(str_replace_all(string = motive_main, pattern = " ", replacement = "\n")), levels = c("Beliefs", "Benefit", "Risk", "Misunderstood", "Text\nAnswer"))
    ) %>% 
    ggplot() +
    geom_tile(aes(as.numeric(motive_main), name, fill = mean), color = "white", lwd = .5) +
    geom_text(aes(as.numeric(motive_main), name, label = label_str), size = 2.75) +
    scale_fill_gradient2(
      low = "red",
      mid = "white",
      high = "green",
      midpoint = 0,
      limits = c(-0.5, 0.5)
    ) +
    scale_x_continuous(sec.axis = dup_axis(),breaks = 1:5, 
                               limits = c(0.5, 5.5),
                       expand = c(0,0),
                       labels = c(paste("Beliefs\nN=", n_by_group[["beliefs"]]), 
                                  paste("Benefit\nN=", n_by_group[["benefit"]]), 
                                  paste("Risk\nN=", n_by_group[["risk"]]), 
                                  paste("Misunderstood\nN=", n_by_group[["misunderstood"]]), 
                                  paste("Text Answer\nN=", n_by_group[["text answer"]]))) +
    theme_minimal() +
    theme(legend.position = "bottom",                      
          axis.title.y = element_text(angle = 0)) +
    labs(
      x = "Motive Impediment", y = "Feature",
      fill = "Means (std)"
    ) 
}

2.2.1.1 Motive

motive_reason_viz(group = starts_with(c("motive_reason_", "no_motive", "motive_elaboration", "motive_other",
                                        "against_beliefs", "no_benefits", "risky", "bad_side_effects", "lack_of_testing", "not_trust_pharma",
       "covid_not_dangerous", "vaccines_dont_work", "freedom_to_choose",
       "religious_reasons")))

2.2.1.2 Ability

motive_reason_viz(group = starts_with(c("ability_reason_", "no_ability", "ability_elaboration", "ability_other",
                                      "no_time", "no_money", "no_availability", "no_time_off_work", "no_time_to_research",
       "no_childcare", "no_cash", "no_insurance", "travel_costs", "no_vax_left", "too_far")))

2.2.1.3 Best Treatment

motive_reason_viz(group = starts_with("best_treat_"))

2.2.1.4 Opinion Friend/Conversation

motive_reason_viz(group = starts_with(c("opinion_friend", "opinion_conv", "opinion_conv_sentiment")))

2.2.1.5 Demographics/Info Source

motive_reason_viz(group = starts_with(c("nigeria", "kenya", "ghana", "south_africa", "age", "education", "religiosity", "location", "black_or_african", 
       "post_want_vax", "covid_already", "want_link", "want_answer", "self_reflection", "info_source")))

2.2.2 Tables

2.2.2.1 Against Beliefs

df_features_unvax %>%
  clean_names(case = "title") %>%
  filter(`Against Beliefs` == 1) %>%
  papeR::summarize_numeric() %>% 
  datatable(options = list(pageLength = 10, columnDefs = list(list(orderable = TRUE, targets = 0))))

2.2.2.2 No Benefits

df_features_unvax %>%
  clean_names(case = "title") %>%
  filter(`No Benefits` == 1) %>%
  papeR::summarize_numeric() %>% 
  datatable(options = list(pageLength = 10, columnDefs = list(list(orderable = TRUE, targets = 0))))

2.2.2.3 Risky

df_features_unvax %>%
  clean_names(case = "title") %>%
  filter(`Risky` == 1) %>%
  papeR::summarize_numeric() %>% 
  datatable(options = list(pageLength = 10, columnDefs = list(list(orderable = TRUE, targets = 0))))

2.3 Summary Statistics by Ability Impediment

The heatmap below shows the mean values of features (along the y-axis) for each of the ability impediments(along the x-axis). Each feature is normalized before aggregating so that its mean across ability impediments is 0. Hence, for each , reds on the plot show values below the mean across ability impediments, whereas greens on the plot show values above it.

2.3.1 Heatmaps

ability_reason_viz <- function(group){
# observations by group
n_by_group <- df_unvax$ability_main %>% 
  table

df_features_unvax %>% 
  mutate(ability_main = df_unvax$ability_main) %>%
  select(!best_treatment & !no_availability & !no_time & !no_money & !no_ability & !ability_elaboration & !vaccinated) %>%
  select(ability_main, group) %>%
  filter(!is.na(ability_main)) %>%
  mutate_at(vars(-ability_main), ~ scale(.) %>% as.vector()) %>% 
  group_by(ability_main) %>%
  summarise_all(list(sep_mean = ~ mean(., na.rm = T), 
                     sep_sd = ~ sd(., na.rm =T))) %>%
  pivot_longer(cols = !ability_main, names_to = c("name"))%>%
  separate(name, into = c("name", "value_type"), sep = "_sep_") %>%
  mutate(
    ability_main = factor(ability_main),
    name =str_to_title(str_replace_all(string = name, pattern  = "_", replacement = " ")) %>% fct_inorder() %>% fct_rev()
  ) %>%
  pivot_wider(names_from = value_type, values_from = value) %>% 
  mutate(
    mean = round(mean, 2),
    mean_char = as.character(mean),
    mean_char = if_else(mean > 0.4, "0.4+", mean_char),
    mean = if_else(mean > 0.4, 0.4, mean),
    mean_char = if_else(mean < -0.4, "-0.4", mean_char),
    mean = if_else(mean < -0.4, -0.4, mean),
    sd = round(sd, 2),
    label_str = str_c(mean_char, " (", sd, ")"),
    ability_main = factor(str_to_title(str_replace_all(string = ability_main, pattern = " ", replacement = "\n")),  levels =c("Availability", "Money", "Time", "Misunderstood", "Text\nAnswer"))
  ) %>% 
  ggplot() +
  geom_tile(aes(as.numeric(ability_main), name, fill = mean), color = "white", lwd = .5, width = 1, height = 1) +
  geom_text(aes(as.numeric(ability_main), name, label = label_str), size = 2.75) +
  scale_fill_gradient2(
    low = "red",
    mid = "white",
    high = "green",
    midpoint = 0,
    limits = c(-0.5, 0.5)
  )+
  scale_x_continuous(sec.axis = dup_axis(),breaks = 1:5,
                     limits = c(0.5, 5.5),
                     expand = c(0,0),
                     labels = c(paste("Availability\nN=", n_by_group[["availability"]]), 
                                paste("Money\nN=", n_by_group[["money"]]), 
                                paste("Time\nN=", n_by_group[["time"]]), 
                                paste("Misunderstood\nN=", n_by_group[["misunderstood"]]), 
                                paste("Text Answer\nN=", n_by_group[["text answer"]]))) +
  theme_minimal() +
  theme(legend.position = "bottom", 
        axis.title.y = element_text(angle = 0)) +
  labs(
    x = "Ability Impediment", y = "Feature",
    fill = "Means (std)"
  ) 
}

2.3.1.1 Motive

ability_reason_viz(group = starts_with(c("motive_reason_", "no_motive", "motive_elaboration", "motive_other",
                                        "against_beliefs", "no_benefits", "risky", "bad_side_effects", "lack_of_testing", "not_trust_pharma",
       "covid_not_dangerous", "vaccines_dont_work", "freedom_to_choose",
       "religious_reasons")))

2.3.1.2 Ability

ability_reason_viz(group = starts_with(c("ability_reason_", "no_ability", "ability_elaboration", "ability_other",
                                      "no_time", "no_money", "no_availability", "no_time_off_work", "no_time_to_research",
       "no_childcare", "no_cash", "no_insurance", "travel_costs", "no_vax_left", "too_far")))

2.3.1.3 Best Treatment

ability_reason_viz(group = starts_with("best_treat_"))

2.3.1.4 Opinion Friend/Conversation

ability_reason_viz(group = starts_with(c("opinion_friend", "opinion_conv", "opinion_conv_sentiment")))

2.3.1.5 Demographics/Info Source

ability_reason_viz(group = starts_with(c("nigeria", "kenya", "ghana", "south_africa", "age", "education", "religiosity", "location", "black_or_african", 
       "post_want_vax", "covid_already", "want_link", "want_answer", "self_reflection", "info_source")))

2.3.2 Tables

2.3.2.1 No Availability

df_features_unvax %>%
  clean_names(case = "title") %>%
  filter(`No Availability` == 1) %>%
  papeR::summarize_numeric() %>% 
  datatable(options = list(pageLength = 10, columnDefs = list(list(orderable = TRUE, targets = 0))))

2.3.2.2 No Money

df_features_unvax %>%
  clean_names(case = "title") %>%
  filter(`No Money` == 1) %>%
  papeR::summarize_numeric() %>% 
  datatable(options = list(pageLength = 10, columnDefs = list(list(orderable = TRUE, targets = 0))))

2.3.2.3 No Time

df_features_unvax %>%
  clean_names(case = "title") %>%
  filter(`No Time` == 1) %>%
  papeR::summarize_numeric() %>% 
  datatable(options = list(pageLength = 10, columnDefs = list(list(orderable = TRUE, targets = 0))))

2.4 Summary Statistics by Best Treatment

The heatmap below shows the mean values of features (along the y-axis) for each of the best treatments (along the x-axis). Each feature is normalized before aggregating so that its mean across best treatments is 0. Hence, for each, reds on the plot show values below the mean across best treatments, whereas greens on the plot show values above it.

2.4.1 Heatmaps

best_treatment_viz <- function(group){

n_by_group <- df_features_unvax$best_treatment %>% 
  table

df_features_unvax %>% 
  filter(!(best_treatment %in%c("Text answer", NA))) %>%
  select(!vaccinated) %>%
  select(best_treatment, group) %>%
  mutate_at(vars(-best_treatment), ~ scale(.) %>% as.vector()) %>% 
  group_by(best_treatment) %>% 
  summarise_all(list(sep_mean = ~ mean(., na.rm = T), sep_sd = ~ sd(., na.rm =T))) %>%
  pivot_longer(cols = !best_treatment, names_to = c("name"))%>%
  separate(name, into = c("name", "value_type"), sep = "_sep_") %>%
  mutate(
    best_treatment = factor(best_treatment),
    name =str_to_title(str_replace_all(string = name, pattern  = "_", replacement = " ")) %>% fct_inorder() %>% fct_rev()
  ) %>%
  pivot_wider(names_from = value_type, values_from = value) %>% 
  mutate(
    mean = round(mean, 2),
    mean_char = as.character(mean),
    mean_char = if_else(mean > 0.4, "0.4+", mean_char),
    mean = if_else(mean > 0.4, 0.4, mean),
    mean_char = if_else(mean < -0.4, "-0.4", mean_char),
    mean = if_else(mean < -0.4, -0.4, mean),
    sd = round(sd, 2),
    label_str = str_c(mean_char, " (", sd, ")"),
    best_treatment = str_replace_all(string = best_treatment, pattern = " ", replacement = "\n")
  ) %>% 
  ggplot() +
  geom_tile(aes(as.numeric(as.factor(best_treatment)), name, fill = mean), color = "white", lwd = .5) +
  geom_text(aes(as.numeric(as.factor(best_treatment)), name, label = label_str), size = 2.75) +
  scale_fill_gradient2(
    low = "red",
    mid = "white",
    high = "green",
    midpoint = 0,
    limits = c(-0.5, 0.5)
  ) +
  scale_x_continuous(breaks = 1:10, 
                     sec.axis = dup_axis(),
                     expand = c(0,0),
                     limits = c(0.5, 10.5),
                     labels= c(paste("Appointment\nN=", n_by_group[["Appointment"]]),
                               paste("Convenient\nto\nvaccinate\nN=", n_by_group[["Convenient to vaccinate"]]),
                               paste("Family/friend\nendorses\nit\nN=", n_by_group[["Family/friend endorses it"]]),
                               paste("Job/school\nrequired\nN=", n_by_group[["Job/school required it"]]),
                               paste("More\nsafety\nevidence\nN=", n_by_group[["More safety evidence"]]),
                               paste("No,\nsomething\nelse\nN=", n_by_group[["No, something else"]]),
                               paste("Nothing\nN=", n_by_group[["Nothing"]]),
                               paste("Reminders\nN=", n_by_group[["Reminders"]]),
                               paste("Rewards\nfor\nvaccinating\nN=", n_by_group[["Rewards for vaccinating"]]),
                               paste("Time\noff\nwork\nN=", n_by_group[["Time off work"]]))) + 
  theme_minimal() +
  theme(legend.position = "bottom",
        axis.title.y = element_text(
      angle = 0))+
  labs(
    x = "Best Treatment", y = "Feature",
    fill = "Means (std)"
  ) 
}

2.4.1.1 Motive

best_treatment_viz(group = starts_with(c("motive_reason_", "no_motive", "motive_elaboration", "motive_other",
                                        "against_beliefs", "no_benefits", "risky", "bad_side_effects", "lack_of_testing", "not_trust_pharma",
       "covid_not_dangerous", "vaccines_dont_work", "freedom_to_choose",
       "religious_reasons")))

2.4.1.2 Ability

best_treatment_viz(group = starts_with(c("ability_reason_", "no_ability", "ability_elaboration", "ability_other",
                                      "no_time", "no_money", "no_availability", "no_time_off_work", "no_time_to_research",
       "no_childcare", "no_cash", "no_insurance", "travel_costs", "no_vax_left", "too_far")))

2.4.1.3 Best Treatment

best_treatment_viz(group = starts_with("best_treat_"))

2.4.1.4 Opinion Friend/Conversation

best_treatment_viz(group = starts_with(c("opinion_friend", "opinion_conv", "opinion_conv_sentiment")))

2.4.1.5 Demographics/Info Source

best_treatment_viz(group = starts_with(c("nigeria", "kenya", "ghana", "south_africa", "age", "education", "religiosity", "location", "black_or_african", 
       "post_want_vax", "covid_already", "want_link", "want_answer", "self_reflection", "info_source")))

2.4.2 Tables

2.4.2.1 Family/friend endorses it

df_features_unvax %>%
  clean_names(case = "title") %>%
  filter(`Best Treatment` == "Family/friend endorses it") %>%
  papeR::summarize_numeric() %>% 
  datatable(options = list(pageLength = 10, columnDefs = list(list(orderable = TRUE, targets = 0))))

2.4.2.2 More safety evidence

df_features_unvax %>%
  clean_names(case = "title") %>%
  filter(`Best Treatment` == "More safety evidence") %>%
  papeR::summarize_numeric() %>% 
  datatable(options = list(pageLength = 10, columnDefs = list(list(orderable = TRUE, targets = 0))))

2.4.2.3 Reminders

df_features_unvax %>%
  clean_names(case = "title") %>%
  filter(`Best Treatment` == "Reminders") %>%
  papeR::summarize_numeric() %>% 
  datatable(options = list(pageLength = 10, columnDefs = list(list(orderable = TRUE, targets = 0))))

2.4.2.4 Rewards for vaccinating

df_features_unvax %>%
  clean_names(case = "title") %>%
  filter(`Best Treatment` == "Rewards for vaccinating") %>%
  papeR::summarize_numeric() %>% 
  datatable(options = list(pageLength = 10, columnDefs = list(list(orderable = TRUE, targets = 0))))

2.4.2.5 Convenient to vaccinate

df_features_unvax %>%
  clean_names(case = "title") %>%
  filter(`Best Treatment` == "Convenient to vaccinate") %>%
  papeR::summarize_numeric() %>% 
  datatable(options = list(pageLength = 10, columnDefs = list(list(orderable = TRUE, targets = 0))))

2.4.2.6 Time off work

df_features_unvax %>%
  clean_names(case = "title") %>%
  filter(`Best Treatment` == "Time off work") %>%
  papeR::summarize_numeric() %>% 
  datatable(options = list(pageLength = 10, columnDefs = list(list(orderable = TRUE, targets = 0))))

2.4.2.7 Job/school required it

df_features_unvax %>%
  clean_names(case = "title") %>%
  filter(`Best Treatment` == "Job/school required it") %>%
  papeR::summarize_numeric() %>% 
  datatable(options = list(pageLength = 10, columnDefs = list(list(orderable = TRUE, targets = 0))))

2.4.2.8 Appointment

df_features_unvax %>%
  clean_names(case = "title") %>%
  filter(`Best Treatment` == "Appointment") %>%
  papeR::summarize_numeric() %>% 
  datatable(options = list(pageLength = 10, columnDefs = list(list(orderable = TRUE, targets = 0))))

2.4.2.9 No, something else

df_features_unvax %>%
  clean_names(case = "title") %>%
  filter(`Best Treatment` == "No, something else") %>%
  papeR::summarize_numeric() %>% 
  datatable(options = list(pageLength = 10, columnDefs = list(list(orderable = TRUE, targets = 0))))

2.4.2.10 Nothing

df_features_unvax %>%
  clean_names(case = "title") %>%
  filter(`Best Treatment` == "Nothing") %>%
  papeR::summarize_numeric() %>% 
  datatable(options = list(pageLength = 10, columnDefs = list(list(orderable = TRUE, targets = 0))))

2.5 Other Characterizations

2.5.1 Features by Country

n_by_group <- df_unvax %>%  select(country_answer) %>% table
df_features_unvax %>% 
  mutate(country_answer = df_unvax %>%  select(country_answer) %>%pull) %>%
  select(!best_treatment & !vaccinated & !ghana & !south_africa & !kenya & !nigeria) %>%
  filter(!is.na(country_answer)) %>%
  mutate_at(vars(-country_answer), ~ scale(.) %>% as.vector()) %>% 
  group_by(country_answer) %>% 
  summarise_all(list(sep_mean = ~ mean(., na.rm = T), sep_sd = ~ sd(., na.rm =T))) %>%
  pivot_longer(cols = !country_answer, names_to = c("name"))%>%
  separate(name, into = c("name", "value_type"), sep = "_sep_") %>%
  mutate(
    country_answer = factor(country_answer),
    name =str_to_title(str_replace_all(string = name, pattern  = "_", replacement = " ")) %>% fct_inorder() %>% fct_rev()
  ) %>%
  pivot_wider(names_from = value_type, values_from = value) %>% 
  mutate(
    mean = round(mean, 2),
    mean_char = as.character(mean),
    mean_char = if_else(mean > 0.4, "0.4+", mean_char),
    mean = if_else(mean > 0.4, 0.4, mean),
    mean_char = if_else(mean < -0.4, "-0.4", mean_char),
    mean = if_else(mean < -0.4, -0.4, mean),
    sd = round(sd, 2),
    label_str = str_c(mean_char, " (", sd, ")")#,   
    #motive_main = factor(str_to_title(str_replace_all(string = motive_main, pattern = " ", replacement = "\n")), levels = c("Beliefs", "Benefit", "Risk", "Misunderstood", "Other"))
  ) %>% 
  ggplot() +
  geom_tile(aes(as.numeric(as.factor(country_answer)), name, fill = mean), color = "white", lwd = .5) +
  geom_text(aes(as.numeric(as.factor(country_answer)), name, label = label_str), size = 2.75) +
  scale_fill_gradient2(
    low = "red",
    mid = "white",
    high = "green",
    midpoint = 0,
    limits = c(-0.5, 0.5)
  ) +
  scale_x_continuous(breaks = 1:4, sec.axis = dup_axis(),
                     expand = c(0,0),
                     limits = c(0.5, 4.5),
                     labels= c(paste("Ghana\nN=", n_by_group[["ghana"]]), 
                               paste("Kenya\nN=", n_by_group[["kenya"]]), 
                               paste("Nigeria\nN=", n_by_group[["nigeria"]]), 
                               paste("South Africa\nN=", n_by_group[["south africa"]]))) + 
  theme_minimal() +
  theme(legend.position = "bottom",
        axis.title.y = element_text(
      angle = 0))+
  labs(
    x = "Country", y = "Feature",
    fill = "Means (std)"
  )

2.5.2 Treatment - Random Typing

n_by_group <- df_features_unvax %>% select(`treatment_random typing`) %>%
  table(useNA = "ifany")

## Error: Can't subset columns that don't exist.
## x Column `treatment_random typing` doesn't exist.

df_features_unvax %>% 
  mutate(random_typing = `treatment_random typing`) %>%
  select(!best_treatment & !starts_with("treatment_") & !vaccinated) %>%
  filter(!is.na(random_typing) ) %>%
  mutate_at(vars(-random_typing), ~ scale(.) %>% as.vector()) %>% 
  group_by(random_typing) %>% 
  summarise_all(list(sep_mean = ~ mean(., na.rm = T), sep_sd = ~ sd(., na.rm =T))) %>%
  pivot_longer(cols = !random_typing, names_to = c("name"))%>%
  separate(name, into = c("name", "value_type"), sep = "_sep_") %>%
  mutate(
    random_typing = factor(random_typing),
    name =str_to_title(str_replace_all(string = name, pattern  = "_", replacement = " ")) %>% fct_inorder() %>% fct_rev()
  ) %>%
  pivot_wider(names_from = value_type, values_from = value) %>% 
  mutate(
    mean = round(mean, 2),
    mean_char = as.character(mean),
    mean_char = if_else(mean > 0.4, "0.4+", mean_char),
    mean = if_else(mean > 0.4, 0.4, mean),
    mean_char = if_else(mean < -0.4, "-0.4", mean_char),
    mean = if_else(mean < -0.4, -0.4, mean),
    sd = round(sd, 2),
    label_str = str_c(mean_char, " (", sd, ")")#,   
    #motive_main = factor(str_to_title(str_replace_all(string = motive_main, pattern = " ", replacement = "\n")), levels = c("Beliefs", "Benefit", "Risk", "Misunderstood", "Other"))
  ) %>% 
  ggplot() +
  geom_tile(aes(as.numeric(as.factor(random_typing)), name, fill = mean), color = "white", lwd = .5) +
  geom_text(aes(as.numeric(as.factor(random_typing)), name, label = label_str), size = 2.75) +
  scale_fill_gradient2(
    low = "red",
    mid = "white",
    high = "green",
    midpoint = 0,
    limits = c(-0.5, 0.5)
  ) +
  scale_x_continuous(breaks = 1:2, sec.axis = dup_axis(),
                     expand = c(0,0),
                     limits = c(0.5, 2.5),
                     labels= c(paste("No Random Typing\nN=", n_by_group[["0"]]), paste("Random Typing\nN=",n_by_group[["1"]]))) + 
  theme_minimal() +
  theme(legend.position = "bottom",
        axis.title.y = element_text(
      angle = 0))+
  labs(
    x = "Treatment: Random Typing", y = "Feature",
    fill = "Means (std)"
  )

## Error: Problem with `mutate()` column `random_typing`.
## ℹ `random_typing = treatment_random typing`.
## x object 'treatment_random typing' not found

2.5.3 Motive Reason - Nonsensical

n_by_group <- df_features_unvax %>% select(lack_motive_nonsensical) %>%
  table(useNA = "ifany")

## Error: Can't subset columns that don't exist.
## x Column `lack_motive_nonsensical` doesn't exist.

df_features_unvax %>% 
  mutate(motive_non_sensical = lack_motive_nonsensical) %>%
  select(!best_treatment & !starts_with("lack_motive_") & !vaccinated & !no_motive) %>%
  filter(!is.na(motive_non_sensical) ) %>%
  mutate_at(vars(-motive_non_sensical), ~ scale(.) %>% as.vector()) %>% 
  group_by(motive_non_sensical) %>% 
  summarise_all(list(sep_mean = ~ mean(., na.rm = T), sep_sd = ~ sd(., na.rm =T))) %>%
  pivot_longer(cols = !motive_non_sensical, names_to = c("name"))%>%
  separate(name, into = c("name", "value_type"), sep = "_sep_") %>%
  mutate(
    motive_non_sensical = factor(motive_non_sensical),
    name =str_to_title(str_replace_all(string = name, pattern  = "_", replacement = " ")) %>% fct_inorder() %>% fct_rev()
  ) %>%
  pivot_wider(names_from = value_type, values_from = value) %>% 
  mutate(
    mean = round(mean, 2),
    mean_char = as.character(mean),
    mean_char = if_else(mean > 0.4, "0.4+", mean_char),
    mean = if_else(mean > 0.4, 0.4, mean),
    mean_char = if_else(mean < -0.4, "-0.4", mean_char),
    mean = if_else(mean < -0.4, -0.4, mean),
    sd = round(sd, 2),
    label_str = str_c(mean_char, " (", sd, ")")#,   
    #motive_main = factor(str_to_title(str_replace_all(string = motive_main, pattern = " ", replacement = "\n")), levels = c("Beliefs", "Benefit", "Risk", "Misunderstood", "Other"))
  ) %>% 
  ggplot() +
  geom_tile(aes(as.numeric(as.factor(motive_non_sensical)), name, fill = mean), color = "white", lwd = .5) +
  geom_text(aes(as.numeric(as.factor(motive_non_sensical)), name, label = label_str), size = 2.75) +
  scale_fill_gradient2(
    low = "red",
    mid = "white",
    high = "green",
    midpoint = 0,
    limits = c(-0.5, 0.5)
  ) +
  scale_x_continuous(breaks = 1:2, sec.axis = dup_axis(),
                     expand = c(0,0),
                     limits = c(0.5, 2.5),
                     labels= c(paste("Not Nonsensical\nN=", n_by_group[["0"]]), 
                               paste("Nonsensical", n_by_group[["1"]]))) + 
  theme_minimal() +
  theme(legend.position = "bottom",
        axis.title.y = element_text(
      angle = 0))+
  labs(
    x = "Motive Reason: Nonsensical", y = "Feature",
    fill = "Means (std)"
  )

## Error: Problem with `mutate()` column `motive_non_sensical`.
## ℹ `motive_non_sensical = lack_motive_nonsensical`.
## x object 'lack_motive_nonsensical' not found

2.5.4 Needles/Pain

n_by_group <- df_features_unvax %>% 
  select(lack_motive_needles_injection) %>%
  table(useNA = "ifany")

## Error: Can't subset columns that don't exist.
## x Column `lack_motive_needles_injection` doesn't exist.

df_features_unvax %>% 
  mutate(needles = lack_motive_needles_injection) %>%
  select(!best_treatment & !starts_with("lack_motive_") & !vaccinated & !no_motive) %>%
  filter(!is.na(needles) ) %>%
  mutate_at(vars(-needles), ~ scale(.) %>% as.vector()) %>% 
  group_by(needles) %>% 
  summarise_all(list(sep_mean = ~ mean(., na.rm = T), sep_sd = ~ sd(., na.rm =T))) %>%
  pivot_longer(cols = !needles, names_to = c("name"))%>%
  separate(name, into = c("name", "value_type"), sep = "_sep_") %>%
  mutate(
    needles = factor(needles),
    name =str_to_title(str_replace_all(string = name, pattern  = "_", replacement = " ")) %>% fct_inorder() %>% fct_rev()
  ) %>%
  pivot_wider(names_from = value_type, values_from = value) %>% 
  mutate(
    mean = round(mean, 2),
    mean_char = as.character(mean),
    mean_char = if_else(mean > 0.4, "0.4+", mean_char),
    mean = if_else(mean > 0.4, 0.4, mean),
    mean_char = if_else(mean < -0.4, "-0.4", mean_char),
    mean = if_else(mean < -0.4, -0.4, mean),
    sd = round(sd, 2),
    label_str = str_c(mean_char, " (", sd, ")")#,   
    #motive_main = factor(str_to_title(str_replace_all(string = motive_main, pattern = " ", replacement = "\n")), levels = c("Beliefs", "Benefit", "Risk", "Misunderstood", "Other"))
  ) %>% 
  ggplot() +
  geom_tile(aes(as.numeric(as.factor(needles)), name, fill = mean), color = "white", lwd = .5) +
  geom_text(aes(as.numeric(as.factor(needles)), name, label = label_str), size = 2.75) +
  scale_fill_gradient2(
    low = "red",
    mid = "white",
    high = "green",
    midpoint = 0,
    limits = c(-0.5, 0.5)
  ) +
  scale_x_continuous(breaks = 1:2, sec.axis = dup_axis(),
                     expand = c(0,0),
                     limits = c(0.5, 2.5),
                     labels= c(paste("Not Needles/Pain", n_by_group[["0"]]), 
                               paste("Needles/Pain", n_by_group[["1"]]))) + 
  theme_minimal() +
  theme(legend.position = "bottom",
        axis.title.y = element_text(
      angle = 0))+
  labs(
    x = "Motive Reason: Needles/Pain", y = "Feature",
    fill = "Means (std)"
  )

## Error: Problem with `mutate()` column `needles`.
## ℹ `needles = lack_motive_needles_injection`.
## x object 'lack_motive_needles_injection' not found

2.5.5 Motive Reason - Don’t Have Covid

n_by_group <- df_features_unvax %>% 
  select(lack_motive_dont_have_covid) %>%
  table(useNA = "ifany")

## Error: Can't subset columns that don't exist.
## x Column `lack_motive_dont_have_covid` doesn't exist.

df_features_unvax %>% 
  mutate(donthave = lack_motive_dont_have_covid) %>%
  select(!best_treatment & !starts_with("lack_motive_") & !vaccinated & !no_motive) %>%
  filter(!is.na(donthave) ) %>%
  mutate_at(vars(-donthave), ~ scale(.) %>% as.vector()) %>% 
  group_by(donthave) %>% 
  summarise_all(list(sep_mean = ~ mean(., na.rm = T), sep_sd = ~ sd(., na.rm =T))) %>%
  pivot_longer(cols = !donthave, names_to = c("name"))%>%
  separate(name, into = c("name", "value_type"), sep = "_sep_") %>%
  mutate(
    donthave = factor(donthave),
    name =str_to_title(str_replace_all(string = name, pattern  = "_", replacement = " ")) %>% fct_inorder() %>% fct_rev()
  ) %>%
  pivot_wider(names_from = value_type, values_from = value) %>% 
  mutate(
    mean = round(mean, 2),
    mean_char = as.character(mean),
    mean_char = if_else(mean > 0.4, "0.4+", mean_char),
    mean = if_else(mean > 0.4, 0.4, mean),
    mean_char = if_else(mean < -0.4, "-0.4", mean_char),
    mean = if_else(mean < -0.4, -0.4, mean),
    sd = round(sd, 2),
    label_str = str_c(mean_char, " (", sd, ")")#,   
    #motive_main = factor(str_to_title(str_replace_all(string = motive_main, pattern = " ", replacement = "\n")), levels = c("Beliefs", "Benefit", "Risk", "Misunderstood", "Other"))
  ) %>% 
  ggplot() +
  geom_tile(aes(as.numeric(as.factor(donthave)), name, fill = mean), color = "white", lwd = .5) +
  geom_text(aes(as.numeric(as.factor(donthave)), name, label = label_str), size = 2.75) +
  scale_fill_gradient2(
    low = "red",
    mid = "white",
    high = "green",
    midpoint = 0,
    limits = c(-0.5, 0.5)
  ) +
  scale_x_continuous(breaks = 1:2, sec.axis = dup_axis(),
                     expand = c(0,0),
                     limits = c(0.5, 2.5),
                     labels= c(paste("Not Don't Have COVID\nN=", n_by_group[["0"]]), 
                               paste("Don't Have Covid\nN=", n_by_group[["1"]]))) + 
  theme_minimal() +
  theme(legend.position = "bottom",
        axis.title.y = element_text(
      angle = 0))+
  labs(
    x = "Motive Reason: Don't Have COVID", y = "Feature",
    fill = "Means (std)"
  )

## Error: Problem with `mutate()` column `donthave`.
## ℹ `donthave = lack_motive_dont_have_covid`.
## x object 'lack_motive_dont_have_covid' not found

2.5.6 Motive Reason - No Need

n_by_group <- df_features_unvax %>% 
  select(lack_motive_no_need) %>%
  table(useNA = "ifany")

## Error: Can't subset columns that don't exist.
## x Column `lack_motive_no_need` doesn't exist.

df_features_unvax %>% 
  mutate(no_need = lack_motive_no_need) %>%
  select(!best_treatment & !starts_with("lack_motive_") & !vaccinated & !no_motive) %>%
  filter(!is.na(no_need) ) %>%
  mutate_at(vars(-no_need), ~ scale(.) %>% as.vector()) %>% 
  group_by(no_need) %>% 
  summarise_all(list(sep_mean = ~ mean(., na.rm = T), sep_sd = ~ sd(., na.rm =T))) %>%
  pivot_longer(cols = !no_need, names_to = c("name"))%>%
  separate(name, into = c("name", "value_type"), sep = "_sep_") %>%
  mutate(
    no_need = factor(no_need),
    name =str_to_title(str_replace_all(string = name, pattern  = "_", replacement = " ")) %>% fct_inorder() %>% fct_rev()
  ) %>%
  pivot_wider(names_from = value_type, values_from = value) %>% 
  mutate(
    mean = round(mean, 2),
    mean_char = as.character(mean),
    mean_char = if_else(mean > 0.4, "0.4+", mean_char),
    mean = if_else(mean > 0.4, 0.4, mean),
    mean_char = if_else(mean < -0.4, "-0.4", mean_char),
    mean = if_else(mean < -0.4, -0.4, mean),
    sd = round(sd, 2),
    label_str = str_c(mean_char, " (", sd, ")")#,   
    #motive_main = factor(str_to_title(str_replace_all(string = motive_main, pattern = " ", replacement = "\n")), levels = c("Beliefs", "Benefit", "Risk", "Misunderstood", "Other"))
  ) %>% 
  ggplot() +
  geom_tile(aes(as.numeric(as.factor(no_need)), name, fill = mean), color = "white", lwd = .5) +
  geom_text(aes(as.numeric(as.factor(no_need)), name, label = label_str), size = 2.75) +
  scale_fill_gradient2(
    low = "red",
    mid = "white",
    high = "green",
    midpoint = 0,
    limits = c(-0.5, 0.5)
  ) +
  scale_x_continuous(breaks = 1:2, sec.axis = dup_axis(),
                     expand = c(0,0),
                     limits = c(0.5, 2.5),
                     labels= c(paste("Not No Need\nN=", n_by_group[["0"]]), paste("No Need\nN=", n_by_group[["1"]]))) + 
  theme_minimal() +
  theme(legend.position = "bottom",
        axis.title.y = element_text(
      angle = 0))+
  labs(
    x = "Motive Reason: No Need", y = "Feature",
    fill = "Means (std)"
  )

## Error: Problem with `mutate()` column `no_need`.
## ℹ `no_need = lack_motive_no_need`.
## x object 'lack_motive_no_need' not found

Vaccine Hesitancy User Segmentation

Updated: September 20, 2022