1 Overview

The present analysis is based on 2309 unvaccinated respondents completing our survey for pilot wave 8 (August 2022), and 144 features (both MCQ and free text). The related GitHub issue is here.


# Load packages
pacman::p_load(DT, estimatr, kableExtra, readr, reshape2, tidyverse, xtable, dataMaid, ggcorrplot, ggmap, rpart, rpart.plot, pollster, wordcloud, tm, RColorBrewer, hrbrthemes, janitor, purrr, gridExtra, cowplot, rcompanion, nnet, texreg, compareGroups, factoextra, cluster, fastDummies, simputation, sentimentr, politeness, textir, glmnet, gamlr, tm, topicmodels, ldatuning, lda, SnowballC, olsrr, here)

set.seed(94305)

# read in CURRENT chatfuel data

# full data
df_full_v8 <- 
  read_csv(here("pilot8/data/full_df_clean.csv")) %>% 
  clean_names() %>% 
  mutate_if(is.character, ~ str_replace_all(., '[\n\t]', '')) %>%
  mutate(
    motive = if_else(str_detect(motive, "yes"), "yes", "no"),
    motive_main = if_else(str_detect(motive_main, "risk"), "risk", motive_main),
    best_treatment = str_remove_all(best_treatment, "\\.") %>% str_to_sentence(),
    best_treatment = if_else(best_treatment == "New trusted info", "More safety evidence", best_treatment),
  ) %>%
  remove_empty("rows")

# filter to completes and add features
df <-
  df_full_v8 %>%
  filter(full_complete == "complete") %>% 
  drop_na(vax_status)

2 Generating segments

# clean up demographic variables

df_features <-
  df %>% 
  bind_cols(
    df %>%
      pull(opinion_conversation) %>%
      get_sentences() %>%
      sentiment_by() %>% 
      transmute(opinion_conv_sentiment = ave_sentiment)
  ) %>%
  mutate(
    covid_already = if_else(str_detect(covid_already, "No|no"), 0L, 1L) %>% replace_na(0),
    no_motive = case_when(
      motive == "yes" ~ 0L,
      motive == "no" ~ 1L,
    ),
    motive_elaboration = motive_nchar,
    # post_want_vax,
    no_ability = case_when(
      ability == "easy" ~ 0L,
      ability != "easy" & !is.na(ability) ~ 1L,
    ) %>% replace_na(0),
    ability_elaboration = ability_nchar %>% replace_na(0),
    
    against_beliefs = if_else(motive_main == "beliefs", 1L, 0L) %>% replace_na(0),
    no_benefits = if_else(motive_main == "benefit", 1L, 0L) %>% replace_na(0),
    risky = if_else(motive_main == "risk", 1L, 0L) %>% replace_na(0),
    
    no_time = if_else(ability_main == "time", 1L, 0L) %>% replace_na(0),
    no_money = if_else(ability_main == "money", 1L, 0L) %>% replace_na(0),
    no_availability = if_else(ability_main == "availability", 1L, 0L) %>% replace_na(0),
    
    # risk
    bad_side_effects = if_else(str_detect(risk_main, "bad side effects"), 1L, 0L) %>% replace_na(0),
    lack_of_testing = if_else(str_detect(risk_main, "not enough testing"), 1L, 0L) %>% replace_na(0),
    not_trust_pharma = if_else(str_detect(risk_main, "not trust phar") | str_detect(belief_main, "not trust phar"), 1L, 0L) %>% replace_na(0),
    # benefit
    covid_not_dangerous = if_else(str_detect(benefit_main, "covid not dangerous"), 1L, 0L) %>% replace_na(0),
    # had_covid_before = if_else(str_detect(benefit_main, "had covid before"), 1L, 0L) %>% replace_na(0),
    vaccines_dont_work = if_else(str_detect(benefit_main, "vaccines don't work"), 1L, 0L) %>% replace_na(0),
    # belief
    freedom_to_choose = if_else(str_detect(belief_main, "freedom to choose"), 1L, 0L) %>% replace_na(0),
    religious_reasons = if_else(str_detect(belief_main, "religious reasons"), 1L, 0L) %>% replace_na(0),
    
    # time
    no_time_off_work = if_else(str_detect(time_main, "hard to get off work"), 1L, 0L) %>% replace_na(0),
    no_time_to_research = if_else(str_detect(time_main, "no time to research"), 1L, 0L) %>% replace_na(0),
    no_childcare = if_else(str_detect(time_main, "no childcare"), 1L, 0L) %>% replace_na(0),
    # money
    no_cash = if_else(str_detect(money_main, "no cash"), 1L, 0L) %>% replace_na(0),
    no_insurance = if_else(str_detect(money_main, "no insurance"), 1L, 0L) %>% replace_na(0),
    travel_costs = if_else(str_detect(money_main, "travel costs"), 1L, 0L) %>% replace_na(0),
    # availability
    no_vax_left = if_else(str_detect(availability_main, "no vaccines left"), 1L, 0L) %>% replace_na(0),
    too_far = if_else(str_detect(availability_main, "too far away"), 1L, 0L) %>% replace_na(0),
    info_confidence = str_to_lower(info_confidence),
    info_confidence_high = if_else(str_detect(info_confidence, "very"), 1L, 0L) %>% replace_na(0),
    want_link = if_else(str_detect(want_link, "Sure"), 1L, 0L) %>% replace_na(0),
    want_answer = if_else(str_detect(want_answer, "Sure"), 1L, 0L) %>% replace_na(0),
    self_reflection = if_else(self_reflection == "A lot!", 1L, 0L) %>% replace_na(0),
    
    # demographics
    age,
    education = education_num,
    religiosity = religiosity_num,
    location = location_num,
    black_or_african = if_else(ethnicity == "black or african", 1L, 0L) %>% replace_na(0),
    vaccinated = vax_status_num,
    nigeria = if_else(country_answer == "nigeria", 1, 0) %>% replace_na(0),
    kenya = if_else(country_answer == "kenya", 1, 0) %>% replace_na(0),
    ghana = if_else(country_answer == "ghana", 1, 0) %>% replace_na(0),
    south_africa = if_else(country_answer == "south africa", 1, 0) %>% replace_na(0),
    opinion_conv_sentiment = opinion_conv_sentiment %>% replace_na(0)
  ) %>% 
  select(starts_with(c("impediments_hr_", "info_source_hr_", "best_treat_", "opinion_hr_")), 
         covid_already, no_motive, motive_elaboration, no_ability, 
         ability_elaboration, against_beliefs, no_benefits, risky, no_time, no_money, no_availability,
         bad_side_effects, lack_of_testing, not_trust_pharma,
         covid_not_dangerous, vaccines_dont_work, freedom_to_choose,
         religious_reasons, no_time_off_work, no_time_to_research,
         no_childcare, no_cash, no_insurance, travel_costs, no_vax_left, too_far,
         info_confidence, info_confidence_high, want_link, want_answer, self_reflection,
         age, education, religiosity, location, black_or_african, vaccinated, 
         nigeria, kenya, ghana, south_africa,
         best_treatment, opinion_conv_sentiment,post_want_vax) %>%
  select(!info_confidence) %>%
  relocate(starts_with("info_source"), .before = info_confidence_high)

df_features_unvax <- 
  df_features %>%
  filter(vaccinated == 0) %>%
  ## scale
  mutate_if(is.numeric, ~ scale(.) %>% as.vector())

df_unvax <- 
  df %>%
  filter(vax_status_num == 0)

segments <- df_unvax %>%
  mutate(heard_bad_things_seg = as.numeric(impediments_hr_heard_hearsay == 1 |
                                             (impediments_hr_more_information == 1 & impediments_hr_risk == 1)|
                                             (impediments_hr_unsafe == 1 & impediments_hr_risk == 1)|
                                             impediments_hr_scared == 1 |
                                             (impediments_hr_family_friends ==1  & impediments_hr_risk == 1) |
                                             impediments_hr_death == 1)) %>%
  mutate(havent_gotten_vax_seg =as.numeric(
    impediments_hr_no_time == 1 | 
      impediments_hr_distance == 1 | 
      best_treat_easier_access_to_vax == 1|
      best_treat_reminders == 1)
  ) %>%
  mutate(not_relevant_seg = as.numeric(impediments_hr_no_need == 1 | 
                                         opinion_hr_no_need ==1 |
                                         impediments_hr_never_saw_covid == 1| 
                                         opinion_hr_never_saw_covid == 1)) %>%
  mutate(covid_not_exis_seg = impediments_hr_covid_not_real ==1) %>%
  mutate(misinformation_seg = as.numeric(impediments_hr_misinformation == 1)) %>%
  mutate(nothing_would_work_seg = as.numeric(impediments_hr_no_reason == 1 |
                                               best_treat_nothing == 1 |
                                               best_treat_dont_know ==1 | 
                                               impediments_hr_never_saw_covid == 1|
                                               impediments_hr_covid_not_real == 1)) %>%
  mutate(side_effect_scare_seg = ifelse((impediments_hr_side_effects == 1) | (impediments_hr_pain == 1), 1L, 0L) %>% replace_na(0L)) %>%
  mutate(side_effect_maternity_seg = ifelse(impediments_hr_pregnancy_nursing == 1, 1L, 0L) %>% replace_na(0L)) %>%
  mutate(scared_of_needles_seg = ifelse(impediments_hr_needles_injection == 1, 1L, 0L) %>% replace_na(0L)) %>%
  mutate(death_concerns_seg = ifelse((impediments_hr_death == 1 | opinion_hr_death ==1), 1L, 0L) %>% replace_na(0L)) %>%
  mutate(
    against_freedom_choice_principles_seg = 
      if_else(
        (impediments_hr_government == 1 | impediments_hr_trust == 1) &
          (impediments_hr_religion == 0) &
          (impediments_hr_scared == 0),
        1L,
        0L
      ) %>% replace_na(0L)
  ) %>%
  mutate(
    against_religious_beliefs_seg = 
      if_else(
        impediments_hr_religion == 1,
        1L,
        0L
      ) %>% replace_na(0L)
  ) %>%
  mutate(
    believe_body_is_healthy_seg = 
      if_else(
        impediments_hr_healthy == 1 & impediments_hr_no_need == 1,
        1L,
        0L
      ) %>% replace_na(0L)
  ) %>%
  mutate(super_busy_and_poor_seg = 
           ifelse(
             (impediments_hr_financial == 1) |  (impediments_hr_no_time) | (impediments_hr_work) | (best_treat_rewards),
             1L,
             0L
           )%>% replace_na(0L))  %>%
  mutate(need_time_off_work_to_get_vaxxed_seg = 
           ifelse(
             (impediments_hr_work == 1) |(impediments_hr_no_time == 1),
             1L,
             0L
           )%>% replace_na(0L)) %>%
  mutate(cant_get_off_work_to_get_vaxed_and_side_effects_seg = 
           ifelse(
             ((impediments_hr_work == 1) | (impediments_hr_no_time == 1)) & (impediments_hr_side_effects == 1),
             1L,
             0L
           )%>% replace_na(0L)) %>%
  mutate(too_far_away_from_vaccination_site_seg = 
           ifelse(
             (impediments_hr_distance == 1),
             1L,
             0L
           )%>% replace_na(0L)) 

n_by_group_tbl <- 
  segments %>%
  select(ends_with("_seg")) %>%
  dplyr::summarize(across(everything(), sum))

First, the following segments were dropped due to have a size of less than 50.

###Drop Segments with $n < 50$. 

drop <- names(n_by_group_tbl)[n_by_group_tbl < 50]

segments <- segments %>% select(!contains(drop))

list_of_segments <- segments %>%
  select(ends_with("_seg")) %>% colnames()

drop
## [1] "covid_not_exis_seg"                                 
## [2] "side_effect_maternity_seg"                          
## [3] "against_religious_beliefs_seg"                      
## [4] "believe_body_is_healthy_seg"                        
## [5] "cant_get_off_work_to_get_vaxed_and_side_effects_seg"

List of Final segments by number of observations can be seen below:

n_by_group_tbl <- n_by_group_tbl[!(names(n_by_group_tbl) %in% drop)]

n_by_group_tbl %>%
  as.data.frame() %>%
  pivot_longer(cols = everything(), names_to = "segment", values_to = "number_of_observations") %>%
  arrange(desc(number_of_observations)) %>%
  mutate(segment = str_to_title(str_squish(str_replace_all(string = segment, pattern = "_seg|_", " ")))) %>%
  mutate(`%_of_observations` = round(100 * number_of_observations/sum(number_of_observations), 2)) %>% 
  rename_all(~str_to_title(str_replace_all(., "_", " "))) %>%
  datatable()

3 Unsegmented users

Below is the distribution of number of segments assigned to each unvaccinated user (N = 2309). Of these, the first bar represents 833 unvaccinated users who were not assigned a segment according to our heuristics.

segments %>% 
  select(contains("_seg")) %>% 
  mutate(n_segments_per_user = rowSums(.)) %>% 
  tabyl(n_segments_per_user) %>% 
  as_tibble() %>% 
  mutate(flag = (n_segments_per_user == 1)) %>% 
  ggplot(aes(n_segments_per_user, percent)) +
  geom_col(alpha = 0.8, aes(fill = flag), show.legend = F) +
  theme_minimal() +
  scale_x_continuous(breaks = seq(0, 10, 1)) +
  scale_y_continuous(labels = scales::percent_format(accuracy = 1), breaks = seq(0, 1, 0.05)) +
  scale_fill_brewer(palette = "Set1") +
  labs(
    x = "Number of segments assigned",
    y = "Proportion of unvaccinated users",
    title = "Number of segments assigned per unvaccinated user (N = 2309)"
  )

df %>% select(contains(c("imped", "motiv"))) %>% glimpse()

df %>% 
  select(id, impediments_hr_side_effects, motive_reason, motive_other) %>%
  mutate_if(is.character, ~ str_to_lower(.)) %>%
  filter(str_detect(motive_reason, "side effect")) %>%
  tabyl(impediments_hr_side_effects)
  
segments %>% 
  select(id, impediments_hr_side_effects, motive_reason, motive_other, contains("_seg")) %>% 
  mutate_if(is.character, ~ str_to_lower(.)) %>%
  filter(str_detect(motive_reason, "side effect")) %>%
  tabyl(side_effect_scare_seg)
df_unseg <-
  segments %>%
  select(id, contains("_seg")) %>% 
  mutate(n_segments_per_user = rowSums(.[2:13])) %>% 
  filter(n_segments_per_user == 0) %>%
  inner_join(df %>% distinct(id, .keep_all = T), by = "id") %>%
  select(motive_reason, motive_other, ability_reason, ability_other, opinion_friend_family, opinion_conversation, info_source)

We extract these 833 unsegmented unvaccinated users to a separate dataframe for further analysis.

4 Analyzing free text

Now we focus on these 833 unsegmented unvaccinated respondents by generating wordclouds and free text lists for specific questions to understand what they mention in their free text responses.

4.1 Motivation free text

Questions considered:

  • Can you share your main reason for not wanting the vaccine?
  • Are there any other reasons you find it hard to get the vaccine?

Printing raw responses for Can you share your main reason for not wanting the vaccine?:

df_unseg %>%
  select(text = 1) %>% 
  drop_na(text) %>%
  mutate(text = str_to_sentence(text)) %>% 
  filter(text != "U") %>% 
  count(text) %>% 
  arrange(-n) %>% 
  filter(text != "") %>% 
  mutate(percent = round(n/sum(n) * 100, 2)) %>% 
  DT::datatable()

Printing raw responses for Are there any other reasons you find it hard to get the vaccine?:

df_unseg %>%
  select(text = 2) %>% 
  drop_na(text) %>%
  mutate(text = str_to_sentence(text)) %>% 
  filter(text != "U") %>% 
  count(text) %>% 
  arrange(-n) %>% 
  filter(text != "") %>% 
  mutate(percent = round(n/sum(n) * 100, 2)) %>% 
  DT::datatable()

Wordcloud for responses combined across the above 2 questions:

vector_wc <-
  df_unseg %>%
  unite("text", 1:2, na.rm = T, remove = T, sep = ". ") %>% 
  mutate(
    text = gsub("http[^[:space:]]*", "", text) %>% str_to_lower(),
    text = gsub("'", "", text) %>% str_to_lower(),
    text = str_remove_all(text, "vaccine|vaccines|vaccinate|vaccinated|covid|vaccination|dont|get|nothing|yes|no")  
  ) %>% 
  pull(text)

# Create corpus 
docs <- Corpus(VectorSource(vector_wc))

# Clean corpus
docs <-
  docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace) %>%
  tm_map(content_transformer(tolower)) %>%
  tm_map(removeWords, stopwords("english"))

# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)

# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 1, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))

4.2 Ability free text

Questions considered:

  • What is the challenge that affects you the most?
  • Are there any other reasons you find it hard to get the vaccine?

Printing raw responses for What is the challenge that affects you the most?:

df_unseg %>%
  select(text = 3) %>% 
  drop_na(text) %>%
  mutate(text = str_to_sentence(text)) %>% 
  filter(text != "U") %>% 
  count(text) %>% 
  arrange(-n) %>% 
  filter(text != "") %>% 
  mutate(percent = round(n/sum(n) * 100, 2)) %>% 
  DT::datatable()

Printing raw responses for Are there any other reasons you find it hard to get the vaccine?:

df_unseg %>%
  select(text = 4) %>% 
  drop_na(text) %>%
  mutate(text = str_to_sentence(text)) %>% 
  filter(text != "U") %>% 
  count(text) %>% 
  arrange(-n) %>% 
  filter(text != "") %>% 
  mutate(percent = round(n/sum(n) * 100, 2)) %>% 
  DT::datatable()

Wordcloud for responses combined across the above 2 questions:

vector_wc <-
  df_unseg %>%
  unite("text", 3:4, na.rm = T, remove = T, sep = ". ") %>% 
  mutate(
    text = gsub("http[^[:space:]]*", "", text) %>% str_to_lower(),
    text = gsub("'", "", text) %>% str_to_lower(),
    text = str_remove_all(text, "vaccine|vaccines|vaccinate|vaccinated|covid|vaccination|dont|get|nothing|yes|no")  
  ) %>% 
  pull(text)

# Create corpus 
docs <- Corpus(VectorSource(vector_wc))

# Clean corpus
docs <-
  docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace) %>%
  tm_map(content_transformer(tolower)) %>%
  tm_map(removeWords, stopwords("english"))

# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)

# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 1, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))

4.3 Opinion free text

Questions considered:

  • What are your friends and family saying about the covid vaccine these days?
  • What do you think of these conversations?

Printing raw responses for What are your friends and family saying about the covid vaccine these days?:

df_unseg %>%
  select(text = 5) %>% 
  drop_na(text) %>%
  mutate(text = str_to_sentence(text)) %>% 
  filter(text != "U") %>% 
  count(text) %>% 
  arrange(-n) %>% 
  filter(text != "") %>% 
  mutate(percent = round(n/sum(n) * 100, 2)) %>% 
  DT::datatable()

Printing raw responses for What do you think of these conversations?:

df_unseg %>%
  select(text = 6) %>% 
  drop_na(text) %>%
  mutate(text = str_to_sentence(text)) %>% 
  filter(text != "U") %>% 
  count(text) %>% 
  arrange(-n) %>% 
  filter(text != "") %>% 
  mutate(percent = round(n/sum(n) * 100, 2)) %>% 
  DT::datatable()

Wordcloud for responses combined across the above 2 questions:

vector_wc <-
  df_unseg %>%
  unite("text", 5:6, na.rm = T, remove = T, sep = ". ") %>% 
  mutate(
    text = gsub("http[^[:space:]]*", "", text) %>% str_to_lower(),
    text = gsub("'", "", text) %>% str_to_lower(),
    text = str_remove_all(text, "vaccine|vaccines|vaccinate|vaccinated|covid|vaccination|dont|get|nothing|yes|no")  
  ) %>% 
  pull(text)

# Create corpus 
docs <- Corpus(VectorSource(vector_wc))

# Clean corpus
docs <-
  docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace) %>%
  tm_map(content_transformer(tolower)) %>%
  tm_map(removeWords, stopwords("english"))

# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)

# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 1, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))

4.4 Information source

Question considered:

  • Where have you gotten most of your info on the covid vaccine?

Printing raw responses for Where have you gotten most of your info on the covid vaccine?:

df_unseg %>%
  select(text = 7) %>%
  drop_na(text) %>%
  mutate(
    text = str_to_sentence(text),
    text = if_else(text == "Social media", "Social-media", text)
  ) %>% 
  filter(text != "U") %>% 
  count(text) %>% 
  filter(text != "") %>%
  arrange(-n) %>% 
  mutate(percent = round(n/sum(n) * 100, 2)) %>% 
  DT::datatable()

Wordcloud for responses to Where have you gotten most of your info on the covid vaccine?:

vector_wc <-
  df_unseg %>%
  unite("text", 7, na.rm = T, remove = T, sep = ". ") %>% 
  mutate(
    text = str_to_sentence(text),
    text = if_else(text == "Social media", "Social-media", text)
  ) %>% 
  mutate(
    text = gsub("http[^[:space:]]*", "", text) %>% str_to_lower(),
    text = gsub("'", "", text) %>% str_to_lower(),
    text = str_remove_all(text, "vaccine|vaccines|vaccinate|vaccinated|covid|vaccination|dont|get|nothing|yes|no")  
  ) %>% 
  pull(text)

# Create corpus 
docs <- Corpus(VectorSource(vector_wc))

# Clean corpus
docs <-
  docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace) %>%
  tm_map(content_transformer(tolower)) %>%
  tm_map(removeWords, stopwords("english"))

# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)

# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 1, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))

5 Additional analysis

We run some additional analysis on these 833 unsegmented unvaccinated users.

5.1 Treatment-country crosstabs

Treatment-country crosstabs for all unsegmented users:

segments %>%
  select(id, contains("_seg")) %>% 
  mutate(n_segments_per_user = rowSums(.[2:13])) %>% 
  filter(n_segments_per_user == 0) %>%
  inner_join(df %>% distinct(id, .keep_all = T), by = "id") %>%
  tabyl(treatment_assign, country_answer) %>% 
  kable(format = "pipe")
treatment_assign ghana kenya nigeria south africa
T1 3 105 29 17
T2 1 48 57 14
T3 4 48 51 14
T4 4 55 39 11
T5 0 53 277 3

Treatment-country crosstab for 45% unsegmented users who have duplicated IDs:

duped_ids <- 
  segments %>%
  select(id, contains("_seg")) %>% 
  mutate(n_segments_per_user = rowSums(.[2:13])) %>% 
  filter(n_segments_per_user == 0) %>%
  inner_join(df %>% distinct(id, .keep_all = T), by = "id") %>% 
  get_dupes(chatfuel_user_id) %>% 
  distinct(chatfuel_user_id) %>% 
  pull(chatfuel_user_id)

segments %>%
  select(id, contains("_seg")) %>% 
  mutate(n_segments_per_user = rowSums(.[2:13])) %>% 
  filter(n_segments_per_user == 0) %>%
  inner_join(df %>% distinct(id, .keep_all = T), by = "id") %>% 
  filter(chatfuel_user_id %in% duped_ids) %>% 
  tabyl(treatment_assign, country_answer) %>% 
  kable(format = "pipe")
treatment_assign ghana kenya nigeria south africa
T1 1 73 7 13
T5 0 20 262 0



5.2 262 Nigerian T5 respondents

Let’s dive deeper and see who these 262 unsegmented users assigned T5 and from Nigeria are.

5.2.1 Demographic information

segments %>%
  select(id, contains("_seg")) %>% 
  mutate(n_segments_per_user = rowSums(.[2:13])) %>% 
  filter(n_segments_per_user == 0) %>%
  inner_join(df %>% distinct(id, .keep_all = T), by = "id") %>% 
  filter(chatfuel_user_id %in% duped_ids) %>% 
  filter(country_answer == "nigeria", treatment_assign == "T5") %>%
  select(gender, age = cv_age, ethnicity, education, religion, religiosity, location) %>% 
  mutate_if(is_character, ~ as_factor(.)) %>%
  select(contains(c("gender", "ethnicity", "education", "religion", "religiosity", "location"))) %>%
  clean_names(case = "title") %>%
  papeR::summarize_factor() %>% 
  datatable(options = list(pageLength = 50, columnDefs = list(list(orderable = TRUE, targets = 0))))

Distribution of age for these 262 respondents:

segments %>%
  select(id, contains("_seg")) %>% 
  mutate(n_segments_per_user = rowSums(.[2:13])) %>% 
  filter(n_segments_per_user == 0) %>%
  inner_join(df %>% distinct(id, .keep_all = T), by = "id") %>% 
  filter(chatfuel_user_id %in% duped_ids) %>% 
  filter(country_answer == "nigeria", treatment_assign == "T5") %>% 
  transmute(
    age = parse_integer(cv_age),
    age = if_else(age < 0 | age > 120, NA_integer_, age)
  ) %>%
  ggplot(aes(age)) +
  geom_histogram(alpha = 0.9) +
  theme_minimal() +
  labs(
    x = "Age", 
    y = "Count",
    subtitle = "Respondent age distribution"
  )



5.2.2 Free text answers

segments %>%
  select(id, contains("_seg")) %>% 
  mutate(n_segments_per_user = rowSums(.[2:13])) %>% 
  filter(n_segments_per_user == 0) %>%
  inner_join(df %>% distinct(id, .keep_all = T), by = "id") %>% 
  filter(chatfuel_user_id %in% duped_ids) %>% 
  filter(country_answer == "nigeria", treatment_assign == "T5") %>%
  select(opinion_friend_family, opinion_conversation, motive_reason, motive_other, ability_reason, ability_other, info_source)