The present analysis is based on 2309 unvaccinated respondents completing our survey for pilot wave 8 (August 2022), and 144 features (both MCQ and free text). The related GitHub issue is here.
# Load packages
pacman::p_load(DT, estimatr, kableExtra, readr, reshape2, tidyverse, xtable, dataMaid, ggcorrplot, ggmap, rpart, rpart.plot, pollster, wordcloud, tm, RColorBrewer, hrbrthemes, janitor, purrr, gridExtra, cowplot, rcompanion, nnet, texreg, compareGroups, factoextra, cluster, fastDummies, simputation, sentimentr, politeness, textir, glmnet, gamlr, tm, topicmodels, ldatuning, lda, SnowballC, olsrr, here)
set.seed(94305)
# read in CURRENT chatfuel data
# full data
df_full_v8 <-
read_csv(here("pilot8/data/full_df_clean.csv")) %>%
clean_names() %>%
mutate_if(is.character, ~ str_replace_all(., '[\n\t]', '')) %>%
mutate(
motive = if_else(str_detect(motive, "yes"), "yes", "no"),
motive_main = if_else(str_detect(motive_main, "risk"), "risk", motive_main),
best_treatment = str_remove_all(best_treatment, "\\.") %>% str_to_sentence(),
best_treatment = if_else(best_treatment == "New trusted info", "More safety evidence", best_treatment),
) %>%
remove_empty("rows")
# filter to completes and add features
df <-
df_full_v8 %>%
filter(full_complete == "complete") %>%
drop_na(vax_status)# clean up demographic variables
df_features <-
df %>%
bind_cols(
df %>%
pull(opinion_conversation) %>%
get_sentences() %>%
sentiment_by() %>%
transmute(opinion_conv_sentiment = ave_sentiment)
) %>%
mutate(
covid_already = if_else(str_detect(covid_already, "No|no"), 0L, 1L) %>% replace_na(0),
no_motive = case_when(
motive == "yes" ~ 0L,
motive == "no" ~ 1L,
),
motive_elaboration = motive_nchar,
# post_want_vax,
no_ability = case_when(
ability == "easy" ~ 0L,
ability != "easy" & !is.na(ability) ~ 1L,
) %>% replace_na(0),
ability_elaboration = ability_nchar %>% replace_na(0),
against_beliefs = if_else(motive_main == "beliefs", 1L, 0L) %>% replace_na(0),
no_benefits = if_else(motive_main == "benefit", 1L, 0L) %>% replace_na(0),
risky = if_else(motive_main == "risk", 1L, 0L) %>% replace_na(0),
no_time = if_else(ability_main == "time", 1L, 0L) %>% replace_na(0),
no_money = if_else(ability_main == "money", 1L, 0L) %>% replace_na(0),
no_availability = if_else(ability_main == "availability", 1L, 0L) %>% replace_na(0),
# risk
bad_side_effects = if_else(str_detect(risk_main, "bad side effects"), 1L, 0L) %>% replace_na(0),
lack_of_testing = if_else(str_detect(risk_main, "not enough testing"), 1L, 0L) %>% replace_na(0),
not_trust_pharma = if_else(str_detect(risk_main, "not trust phar") | str_detect(belief_main, "not trust phar"), 1L, 0L) %>% replace_na(0),
# benefit
covid_not_dangerous = if_else(str_detect(benefit_main, "covid not dangerous"), 1L, 0L) %>% replace_na(0),
# had_covid_before = if_else(str_detect(benefit_main, "had covid before"), 1L, 0L) %>% replace_na(0),
vaccines_dont_work = if_else(str_detect(benefit_main, "vaccines don't work"), 1L, 0L) %>% replace_na(0),
# belief
freedom_to_choose = if_else(str_detect(belief_main, "freedom to choose"), 1L, 0L) %>% replace_na(0),
religious_reasons = if_else(str_detect(belief_main, "religious reasons"), 1L, 0L) %>% replace_na(0),
# time
no_time_off_work = if_else(str_detect(time_main, "hard to get off work"), 1L, 0L) %>% replace_na(0),
no_time_to_research = if_else(str_detect(time_main, "no time to research"), 1L, 0L) %>% replace_na(0),
no_childcare = if_else(str_detect(time_main, "no childcare"), 1L, 0L) %>% replace_na(0),
# money
no_cash = if_else(str_detect(money_main, "no cash"), 1L, 0L) %>% replace_na(0),
no_insurance = if_else(str_detect(money_main, "no insurance"), 1L, 0L) %>% replace_na(0),
travel_costs = if_else(str_detect(money_main, "travel costs"), 1L, 0L) %>% replace_na(0),
# availability
no_vax_left = if_else(str_detect(availability_main, "no vaccines left"), 1L, 0L) %>% replace_na(0),
too_far = if_else(str_detect(availability_main, "too far away"), 1L, 0L) %>% replace_na(0),
info_confidence = str_to_lower(info_confidence),
info_confidence_high = if_else(str_detect(info_confidence, "very"), 1L, 0L) %>% replace_na(0),
want_link = if_else(str_detect(want_link, "Sure"), 1L, 0L) %>% replace_na(0),
want_answer = if_else(str_detect(want_answer, "Sure"), 1L, 0L) %>% replace_na(0),
self_reflection = if_else(self_reflection == "A lot!", 1L, 0L) %>% replace_na(0),
# demographics
age,
education = education_num,
religiosity = religiosity_num,
location = location_num,
black_or_african = if_else(ethnicity == "black or african", 1L, 0L) %>% replace_na(0),
vaccinated = vax_status_num,
nigeria = if_else(country_answer == "nigeria", 1, 0) %>% replace_na(0),
kenya = if_else(country_answer == "kenya", 1, 0) %>% replace_na(0),
ghana = if_else(country_answer == "ghana", 1, 0) %>% replace_na(0),
south_africa = if_else(country_answer == "south africa", 1, 0) %>% replace_na(0),
opinion_conv_sentiment = opinion_conv_sentiment %>% replace_na(0)
) %>%
select(starts_with(c("impediments_hr_", "info_source_hr_", "best_treat_", "opinion_hr_")),
covid_already, no_motive, motive_elaboration, no_ability,
ability_elaboration, against_beliefs, no_benefits, risky, no_time, no_money, no_availability,
bad_side_effects, lack_of_testing, not_trust_pharma,
covid_not_dangerous, vaccines_dont_work, freedom_to_choose,
religious_reasons, no_time_off_work, no_time_to_research,
no_childcare, no_cash, no_insurance, travel_costs, no_vax_left, too_far,
info_confidence, info_confidence_high, want_link, want_answer, self_reflection,
age, education, religiosity, location, black_or_african, vaccinated,
nigeria, kenya, ghana, south_africa,
best_treatment, opinion_conv_sentiment,post_want_vax) %>%
select(!info_confidence) %>%
relocate(starts_with("info_source"), .before = info_confidence_high)
df_features_unvax <-
df_features %>%
filter(vaccinated == 0) %>%
## scale
mutate_if(is.numeric, ~ scale(.) %>% as.vector())
df_unvax <-
df %>%
filter(vax_status_num == 0)
segments <- df_unvax %>%
mutate(heard_bad_things_seg = as.numeric(impediments_hr_heard_hearsay == 1 |
(impediments_hr_more_information == 1 & impediments_hr_risk == 1)|
(impediments_hr_unsafe == 1 & impediments_hr_risk == 1)|
impediments_hr_scared == 1 |
(impediments_hr_family_friends ==1 & impediments_hr_risk == 1) |
impediments_hr_death == 1)) %>%
mutate(havent_gotten_vax_seg =as.numeric(
impediments_hr_no_time == 1 |
impediments_hr_distance == 1 |
best_treat_easier_access_to_vax == 1|
best_treat_reminders == 1)
) %>%
mutate(not_relevant_seg = as.numeric(impediments_hr_no_need == 1 |
opinion_hr_no_need ==1 |
impediments_hr_never_saw_covid == 1|
opinion_hr_never_saw_covid == 1)) %>%
mutate(covid_not_exis_seg = impediments_hr_covid_not_real ==1) %>%
mutate(misinformation_seg = as.numeric(impediments_hr_misinformation == 1)) %>%
mutate(nothing_would_work_seg = as.numeric(impediments_hr_no_reason == 1 |
best_treat_nothing == 1 |
best_treat_dont_know ==1 |
impediments_hr_never_saw_covid == 1|
impediments_hr_covid_not_real == 1)) %>%
mutate(side_effect_scare_seg = ifelse((impediments_hr_side_effects == 1) | (impediments_hr_pain == 1), 1L, 0L) %>% replace_na(0L)) %>%
mutate(side_effect_maternity_seg = ifelse(impediments_hr_pregnancy_nursing == 1, 1L, 0L) %>% replace_na(0L)) %>%
mutate(scared_of_needles_seg = ifelse(impediments_hr_needles_injection == 1, 1L, 0L) %>% replace_na(0L)) %>%
mutate(death_concerns_seg = ifelse((impediments_hr_death == 1 | opinion_hr_death ==1), 1L, 0L) %>% replace_na(0L)) %>%
mutate(
against_freedom_choice_principles_seg =
if_else(
(impediments_hr_government == 1 | impediments_hr_trust == 1) &
(impediments_hr_religion == 0) &
(impediments_hr_scared == 0),
1L,
0L
) %>% replace_na(0L)
) %>%
mutate(
against_religious_beliefs_seg =
if_else(
impediments_hr_religion == 1,
1L,
0L
) %>% replace_na(0L)
) %>%
mutate(
believe_body_is_healthy_seg =
if_else(
impediments_hr_healthy == 1 & impediments_hr_no_need == 1,
1L,
0L
) %>% replace_na(0L)
) %>%
mutate(super_busy_and_poor_seg =
ifelse(
(impediments_hr_financial == 1) | (impediments_hr_no_time) | (impediments_hr_work) | (best_treat_rewards),
1L,
0L
)%>% replace_na(0L)) %>%
mutate(need_time_off_work_to_get_vaxxed_seg =
ifelse(
(impediments_hr_work == 1) |(impediments_hr_no_time == 1),
1L,
0L
)%>% replace_na(0L)) %>%
mutate(cant_get_off_work_to_get_vaxed_and_side_effects_seg =
ifelse(
((impediments_hr_work == 1) | (impediments_hr_no_time == 1)) & (impediments_hr_side_effects == 1),
1L,
0L
)%>% replace_na(0L)) %>%
mutate(too_far_away_from_vaccination_site_seg =
ifelse(
(impediments_hr_distance == 1),
1L,
0L
)%>% replace_na(0L))
n_by_group_tbl <-
segments %>%
select(ends_with("_seg")) %>%
dplyr::summarize(across(everything(), sum))First, the following segments were dropped due to have a size of less than 50.
###Drop Segments with $n < 50$.
drop <- names(n_by_group_tbl)[n_by_group_tbl < 50]
segments <- segments %>% select(!contains(drop))
list_of_segments <- segments %>%
select(ends_with("_seg")) %>% colnames()
drop## [1] "covid_not_exis_seg"
## [2] "side_effect_maternity_seg"
## [3] "against_religious_beliefs_seg"
## [4] "believe_body_is_healthy_seg"
## [5] "cant_get_off_work_to_get_vaxed_and_side_effects_seg"
List of Final segments by number of observations can be seen below:
n_by_group_tbl <- n_by_group_tbl[!(names(n_by_group_tbl) %in% drop)]
n_by_group_tbl %>%
as.data.frame() %>%
pivot_longer(cols = everything(), names_to = "segment", values_to = "number_of_observations") %>%
arrange(desc(number_of_observations)) %>%
mutate(segment = str_to_title(str_squish(str_replace_all(string = segment, pattern = "_seg|_", " ")))) %>%
mutate(`%_of_observations` = round(100 * number_of_observations/sum(number_of_observations), 2)) %>%
rename_all(~str_to_title(str_replace_all(., "_", " "))) %>%
datatable()Below is the distribution of number of segments assigned to each unvaccinated user (N = 2309). Of these, the first bar represents 833 unvaccinated users who were not assigned a segment according to our heuristics.
segments %>%
select(contains("_seg")) %>%
mutate(n_segments_per_user = rowSums(.)) %>%
tabyl(n_segments_per_user) %>%
as_tibble() %>%
mutate(flag = (n_segments_per_user == 1)) %>%
ggplot(aes(n_segments_per_user, percent)) +
geom_col(alpha = 0.8, aes(fill = flag), show.legend = F) +
theme_minimal() +
scale_x_continuous(breaks = seq(0, 10, 1)) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1), breaks = seq(0, 1, 0.05)) +
scale_fill_brewer(palette = "Set1") +
labs(
x = "Number of segments assigned",
y = "Proportion of unvaccinated users",
title = "Number of segments assigned per unvaccinated user (N = 2309)"
)df %>% select(contains(c("imped", "motiv"))) %>% glimpse()
df %>%
select(id, impediments_hr_side_effects, motive_reason, motive_other) %>%
mutate_if(is.character, ~ str_to_lower(.)) %>%
filter(str_detect(motive_reason, "side effect")) %>%
tabyl(impediments_hr_side_effects)
segments %>%
select(id, impediments_hr_side_effects, motive_reason, motive_other, contains("_seg")) %>%
mutate_if(is.character, ~ str_to_lower(.)) %>%
filter(str_detect(motive_reason, "side effect")) %>%
tabyl(side_effect_scare_seg)df_unseg <-
segments %>%
select(id, contains("_seg")) %>%
mutate(n_segments_per_user = rowSums(.[2:13])) %>%
filter(n_segments_per_user == 0) %>%
inner_join(df %>% distinct(id, .keep_all = T), by = "id") %>%
select(motive_reason, motive_other, ability_reason, ability_other, opinion_friend_family, opinion_conversation, info_source)We extract these 833 unsegmented unvaccinated users to a separate dataframe for further analysis.
Now we focus on these 833 unsegmented unvaccinated respondents by generating wordclouds and free text lists for specific questions to understand what they mention in their free text responses.
Questions considered:
Printing raw responses for
Can you share your main reason for not wanting the vaccine?:
df_unseg %>%
select(text = 1) %>%
drop_na(text) %>%
mutate(text = str_to_sentence(text)) %>%
filter(text != "U") %>%
count(text) %>%
arrange(-n) %>%
filter(text != "") %>%
mutate(percent = round(n/sum(n) * 100, 2)) %>%
DT::datatable()Printing raw responses for
Are there any other reasons you find it hard to get the vaccine?:
df_unseg %>%
select(text = 2) %>%
drop_na(text) %>%
mutate(text = str_to_sentence(text)) %>%
filter(text != "U") %>%
count(text) %>%
arrange(-n) %>%
filter(text != "") %>%
mutate(percent = round(n/sum(n) * 100, 2)) %>%
DT::datatable()Wordcloud for responses combined across the above 2 questions:
vector_wc <-
df_unseg %>%
unite("text", 1:2, na.rm = T, remove = T, sep = ". ") %>%
mutate(
text = gsub("http[^[:space:]]*", "", text) %>% str_to_lower(),
text = gsub("'", "", text) %>% str_to_lower(),
text = str_remove_all(text, "vaccine|vaccines|vaccinate|vaccinated|covid|vaccination|dont|get|nothing|yes|no")
) %>%
pull(text)
# Create corpus
docs <- Corpus(VectorSource(vector_wc))
# Clean corpus
docs <-
docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace) %>%
tm_map(content_transformer(tolower)) %>%
tm_map(removeWords, stopwords("english"))
# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)
# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 1, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))Questions considered:
Printing raw responses for
What is the challenge that affects you the most?:
df_unseg %>%
select(text = 3) %>%
drop_na(text) %>%
mutate(text = str_to_sentence(text)) %>%
filter(text != "U") %>%
count(text) %>%
arrange(-n) %>%
filter(text != "") %>%
mutate(percent = round(n/sum(n) * 100, 2)) %>%
DT::datatable()Printing raw responses for
Are there any other reasons you find it hard to get the vaccine?:
df_unseg %>%
select(text = 4) %>%
drop_na(text) %>%
mutate(text = str_to_sentence(text)) %>%
filter(text != "U") %>%
count(text) %>%
arrange(-n) %>%
filter(text != "") %>%
mutate(percent = round(n/sum(n) * 100, 2)) %>%
DT::datatable()Wordcloud for responses combined across the above 2 questions:
vector_wc <-
df_unseg %>%
unite("text", 3:4, na.rm = T, remove = T, sep = ". ") %>%
mutate(
text = gsub("http[^[:space:]]*", "", text) %>% str_to_lower(),
text = gsub("'", "", text) %>% str_to_lower(),
text = str_remove_all(text, "vaccine|vaccines|vaccinate|vaccinated|covid|vaccination|dont|get|nothing|yes|no")
) %>%
pull(text)
# Create corpus
docs <- Corpus(VectorSource(vector_wc))
# Clean corpus
docs <-
docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace) %>%
tm_map(content_transformer(tolower)) %>%
tm_map(removeWords, stopwords("english"))
# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)
# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 1, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))Questions considered:
Printing raw responses for
What are your friends and family saying about the covid vaccine these days?:
df_unseg %>%
select(text = 5) %>%
drop_na(text) %>%
mutate(text = str_to_sentence(text)) %>%
filter(text != "U") %>%
count(text) %>%
arrange(-n) %>%
filter(text != "") %>%
mutate(percent = round(n/sum(n) * 100, 2)) %>%
DT::datatable()Printing raw responses for
What do you think of these conversations?:
df_unseg %>%
select(text = 6) %>%
drop_na(text) %>%
mutate(text = str_to_sentence(text)) %>%
filter(text != "U") %>%
count(text) %>%
arrange(-n) %>%
filter(text != "") %>%
mutate(percent = round(n/sum(n) * 100, 2)) %>%
DT::datatable()Wordcloud for responses combined across the above 2 questions:
vector_wc <-
df_unseg %>%
unite("text", 5:6, na.rm = T, remove = T, sep = ". ") %>%
mutate(
text = gsub("http[^[:space:]]*", "", text) %>% str_to_lower(),
text = gsub("'", "", text) %>% str_to_lower(),
text = str_remove_all(text, "vaccine|vaccines|vaccinate|vaccinated|covid|vaccination|dont|get|nothing|yes|no")
) %>%
pull(text)
# Create corpus
docs <- Corpus(VectorSource(vector_wc))
# Clean corpus
docs <-
docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace) %>%
tm_map(content_transformer(tolower)) %>%
tm_map(removeWords, stopwords("english"))
# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)
# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 1, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))Question considered:
Printing raw responses for
Where have you gotten most of your info on the covid vaccine?:
df_unseg %>%
select(text = 7) %>%
drop_na(text) %>%
mutate(
text = str_to_sentence(text),
text = if_else(text == "Social media", "Social-media", text)
) %>%
filter(text != "U") %>%
count(text) %>%
filter(text != "") %>%
arrange(-n) %>%
mutate(percent = round(n/sum(n) * 100, 2)) %>%
DT::datatable()Wordcloud for responses to
Where have you gotten most of your info on the covid vaccine?:
vector_wc <-
df_unseg %>%
unite("text", 7, na.rm = T, remove = T, sep = ". ") %>%
mutate(
text = str_to_sentence(text),
text = if_else(text == "Social media", "Social-media", text)
) %>%
mutate(
text = gsub("http[^[:space:]]*", "", text) %>% str_to_lower(),
text = gsub("'", "", text) %>% str_to_lower(),
text = str_remove_all(text, "vaccine|vaccines|vaccinate|vaccinated|covid|vaccination|dont|get|nothing|yes|no")
) %>%
pull(text)
# Create corpus
docs <- Corpus(VectorSource(vector_wc))
# Clean corpus
docs <-
docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace) %>%
tm_map(content_transformer(tolower)) %>%
tm_map(removeWords, stopwords("english"))
# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)
# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 1, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))We run some additional analysis on these 833 unsegmented unvaccinated users.
Treatment-country crosstabs for all unsegmented users:
segments %>%
select(id, contains("_seg")) %>%
mutate(n_segments_per_user = rowSums(.[2:13])) %>%
filter(n_segments_per_user == 0) %>%
inner_join(df %>% distinct(id, .keep_all = T), by = "id") %>%
tabyl(treatment_assign, country_answer) %>%
kable(format = "pipe")| treatment_assign | ghana | kenya | nigeria | south africa |
|---|---|---|---|---|
| T1 | 3 | 105 | 29 | 17 |
| T2 | 1 | 48 | 57 | 14 |
| T3 | 4 | 48 | 51 | 14 |
| T4 | 4 | 55 | 39 | 11 |
| T5 | 0 | 53 | 277 | 3 |
Treatment-country crosstab for 45% unsegmented users who have duplicated IDs:
duped_ids <-
segments %>%
select(id, contains("_seg")) %>%
mutate(n_segments_per_user = rowSums(.[2:13])) %>%
filter(n_segments_per_user == 0) %>%
inner_join(df %>% distinct(id, .keep_all = T), by = "id") %>%
get_dupes(chatfuel_user_id) %>%
distinct(chatfuel_user_id) %>%
pull(chatfuel_user_id)
segments %>%
select(id, contains("_seg")) %>%
mutate(n_segments_per_user = rowSums(.[2:13])) %>%
filter(n_segments_per_user == 0) %>%
inner_join(df %>% distinct(id, .keep_all = T), by = "id") %>%
filter(chatfuel_user_id %in% duped_ids) %>%
tabyl(treatment_assign, country_answer) %>%
kable(format = "pipe")| treatment_assign | ghana | kenya | nigeria | south africa |
|---|---|---|---|---|
| T1 | 1 | 73 | 7 | 13 |
| T5 | 0 | 20 | 262 | 0 |
Let’s dive deeper and see who these 262 unsegmented users assigned T5 and from Nigeria are.
segments %>%
select(id, contains("_seg")) %>%
mutate(n_segments_per_user = rowSums(.[2:13])) %>%
filter(n_segments_per_user == 0) %>%
inner_join(df %>% distinct(id, .keep_all = T), by = "id") %>%
filter(chatfuel_user_id %in% duped_ids) %>%
filter(country_answer == "nigeria", treatment_assign == "T5") %>%
select(gender, age = cv_age, ethnicity, education, religion, religiosity, location) %>%
mutate_if(is_character, ~ as_factor(.)) %>%
select(contains(c("gender", "ethnicity", "education", "religion", "religiosity", "location"))) %>%
clean_names(case = "title") %>%
papeR::summarize_factor() %>%
datatable(options = list(pageLength = 50, columnDefs = list(list(orderable = TRUE, targets = 0))))Distribution of age for these 262 respondents:
segments %>%
select(id, contains("_seg")) %>%
mutate(n_segments_per_user = rowSums(.[2:13])) %>%
filter(n_segments_per_user == 0) %>%
inner_join(df %>% distinct(id, .keep_all = T), by = "id") %>%
filter(chatfuel_user_id %in% duped_ids) %>%
filter(country_answer == "nigeria", treatment_assign == "T5") %>%
transmute(
age = parse_integer(cv_age),
age = if_else(age < 0 | age > 120, NA_integer_, age)
) %>%
ggplot(aes(age)) +
geom_histogram(alpha = 0.9) +
theme_minimal() +
labs(
x = "Age",
y = "Count",
subtitle = "Respondent age distribution"
)segments %>%
select(id, contains("_seg")) %>%
mutate(n_segments_per_user = rowSums(.[2:13])) %>%
filter(n_segments_per_user == 0) %>%
inner_join(df %>% distinct(id, .keep_all = T), by = "id") %>%
filter(chatfuel_user_id %in% duped_ids) %>%
filter(country_answer == "nigeria", treatment_assign == "T5") %>%
select(opinion_friend_family, opinion_conversation, motive_reason, motive_other, ability_reason, ability_other, info_source)