chatfuel <- read.csv(here("pilot8", "data", "chatfuel", "Share_Your_Voice_2022_08_15_23_59_12_full.csv"), na.strings = "") %>%
mutate(chatfuel.user.id = as.character(chatfuel.user.id)) %>%
# this variable is not used
dplyr::select(!opinion_friend) %>%
# extract ad id from ref
mutate(ad_id = as.character(as.numeric(str_remove(string = ref, pattern = "ad id: ")))) %>%
dplyr::select(!ref) %>%
mutate(full_complete = ifelse(full_complete == "yes" & !is.na(full_complete), "complete", "incomplete"))%>%
#unite variables that are the same
unite("ability_misunderstood_explain", c("ability_misunderstood", "ability_misunderstood_explain"),
na.rm = T, remove = T, sep = "") %>%
unite("availability_main", c("availability_main", "availability_main."),
na.rm = T, remove = T, sep = "") %>%
unite("intro_complete", c("intro_complete", "consent_complete"),
na.rm = T, remove = T, sep = "") %>%
mutate(intro_complete = ifelse(intro_complete == "yesyes", "yes", intro_complete))
Ads
ad_themes <- c("Risky", "Inaccessible", "Unnecessary", "Fear")
ads <- read.csv(here("pilot8","data", "ads_data_v8.csv"))%>%
# extract ad theme and make it a column
mutate(theme = str_extract(pattern = paste(ad_themes, collapse = "|"), Ad.Set.Name),
Ad.ID = as.character(Ad.ID))
# the first line is a summary
ads <- ads[-1, ]
# print non matches
unique(chatfuel$ad_id)[!(unique(chatfuel$ad_id) %in% unique(ads$Ad.ID))]
## [1] NA "23850143823590220" "23850139217980156"
## [4] "23850143823490220" "23850143823480220"
# how many non matching chatfuel ids are in each non matching ad id?
chatfuel %>%
filter(!(ad_id %in% unique(ads$Ad.ID))) %>%
group_by(ad_id) %>%
count()
## # A tibble: 5 × 2
## # Groups: ad_id [5]
## ad_id n
## <chr> <int>
## 1 23850139217980156 1
## 2 23850143823480220 1
## 3 23850143823490220 1
## 4 23850143823590220 1
## 5 <NA> 1197
# drop these (only 1 observation each)
chatfuel <- chatfuel %>%
filter(!(ad_id %in% na.omit(unique(chatfuel$ad_id)[!(unique(chatfuel$ad_id) %in% unique(ads$Ad.ID))])))
# ad id by qualtrics id table
ad_to_qualtrics_id <- chatfuel %>%
select(chatfuel.user.id, ad_id) %>%
merge(ads, by.x = "ad_id", by.y = "Ad.ID", all.x = T)
# we will re merge this later by chatfuel id
chatfuel <- chatfuel %>%
select(!ad_id)
Sign up time
# create sign up time by chatfuel id talbe. we will remerge this later
chatfeul_signup <- chatfuel %>% select(chatfuel.user.id, signed.up)
chatfuel <- chatfuel %>%
select(!signed.up)
Gender
chatfuel_gender <- chatfuel %>%
select(chatfuel.user.id, gender)
chatfuel <- chatfuel %>%
select(!gender)
# get paths to qualtrics data
fullpath <- str_subset(pattern = "full", list.files(here( "pilot8", "data", "qualtrics"), full.names = T))
# bind them all together
qualtrics <- do.call("smartbind",lapply(fullpath,FUN=function(files){ read.csv(files, na.strings = "")[-c(1:2), ] %>%
# create treatment assignment label
mutate(treatment_assign = str_extract(files, "T(1|5)"))})) %>%
# drop pii/ unused columns
dplyr::select(!IPAddress & !RecipientLastName & !RecipientFirstName & !RecipientEmail & !LocationLatitude &
!LocationLongitude & !UserLanguage & !version. & !ExternalReference & !ResponseId &
!DistributionChannel & !Status & !Progress & !Duration..in.seconds.
& !Finished & !RecordedDate ) %>%
#drop users with no chatfuel id or chatfuel ids have characters
filter(chatfuel_id != "" & !str_detect(chatfuel_id, "[A-z]"))
Correct for Qualtrics shutdown issue
A =ymd_hms("2022-08-05 02:50:00")
qualtrics <- qualtrics %>%
mutate(StartDate = ymd_hms(StartDate),
#drop if startdate is after the shutdown date
drop = treatment_assign == "T5" & StartDate > A) %>%
mutate(correction = case_when(drop == T ~ "drop",
drop == F & is.na(full_complete) ~ "incomplete",
drop == F & !is.na(full_complete) ~ "complete",
TRUE ~ NA_character_)) %>%
select(!full_complete) %>%
rename(full_complete = correction)
qualtrics %>%
select(treatment_assign, full_complete) %>%
group_by(treatment_assign, full_complete) %>%
count()
## # A tibble: 5 × 3
## # Groups: treatment_assign, full_complete [5]
## treatment_assign full_complete n
## <chr> <chr> <int>
## 1 T1 complete 1249
## 2 T1 incomplete 137
## 3 T5 complete 2734
## 4 T5 drop 648
## 5 T5 incomplete 1018
#number of observations by chatfuel id
qual_chatid_sum <- qualtrics %>%
dplyr::select(chatfuel_id) %>%
filter(chatfuel_id != "") %>%
group_by(chatfuel_id) %>%
count() %>%
arrange(desc(n))
qual_chatid_sum
## # A tibble: 1,088 × 2
## # Groups: chatfuel_id [1,088]
## chatfuel_id n
## <chr> <int>
## 1 5098864440235852 3208
## 2 5357116787706479 303
## 3 7711755595561101 264
## 4 5103974433004691 158
## 5 6065549646806340 57
## 6 5130844860370768 33
## 7 7712433932163547 33
## 8 8024409180934271 30
## 9 4965765240190005 29
## 10 5207892332603804 29
## # … with 1,078 more rows
# how many chatfuel ids have more than 1 observation?
qual_chatid_sum %>%
ungroup() %>%
mutate(one_obs = ifelse(n == 1, 1, 0)) %>%
group_by(one_obs) %>%
count()
## # A tibble: 2 × 2
## # Groups: one_obs [2]
## one_obs n
## <dbl> <int>
## 1 0 162
## 2 1 926
qualtrics <- qualtrics %>%
group_by(chatfuel_id) %>%
arrange(StartDate) %>%
# the earliest starte date for each chatfuel id is the randomized observation,
# all else are non randomized
mutate(randomized = ifelse(StartDate == min(StartDate), 1, 0)) %>%
dplyr::select(!StartDate & !EndDate) %>%
ungroup()
# percent of randomized by qualtrics treatment
qualtrics %>%
select(treatment_assign, randomized) %>%
group_by(treatment_assign, randomized) %>%
count() %>%
ungroup() %>%
mutate(percent = round(n/sum(n)*100, 2))
## # A tibble: 4 × 4
## treatment_assign randomized n percent
## <chr> <dbl> <int> <dbl>
## 1 T1 0 852 14.7
## 2 T1 1 534 9.23
## 3 T5 0 3846 66.5
## 4 T5 1 554 9.57
Fixing Qualtrics column names liek ability consent, consent.1, consent.2, etc
# get column names with . .1, .2, .3 etc
colnames_to_fix <- str_remove(colnames(qualtrics)[grepl("\\.1",x = colnames(qualtrics) )], ".1")
# unite these columns
for (i in 1:length(colnames_to_fix)){
qualtrics <- qualtrics %>%
unite(!!colnames_to_fix[i],
colnames(qualtrics)[grepl(paste("^", colnames_to_fix[i], "(\\.|$)",sep = ""),
x = colnames(qualtrics))],
na.rm = T, remove = T, sep = "")
}
manually fix the column names with just a period at the end
qualtrics <- qualtrics %>%
unite("ability_mis_explain", c("ability_mis_explain", "ability_mis_explain."),
na.rm = T, remove = T, sep = "")
qualtrics <- qualtrics %>%
unite("motive_mis_explain", c("motive_mis_explain", "motive_mis_explain."),
na.rm = T, remove = T, sep = "")
qualtrics <- qualtrics %>%
unite("best_treatment_2", c("best_treatment_2", "best_treatment2"),
na.rm = T, remove = T, sep = "")
qualtrics <- qualtrics %>%
unite("treat_no_explain", c("treat_no_explain", "treat_no_explain."),
na.rm = T, remove = T, sep = "")
rename qualtrics column names to match chatfuel column names using crosswalk
xwalk <- read.csv(here("pilot8", "scripts", "qualtrics_to_chatfuel.csv")) %>%
dplyr::select(qualtrics_names, chatfuel_col) %>%
filter(chatfuel_col != "")
colnames(qualtrics)[colnames(qualtrics) %in% xwalk$qualtrics_names] <- xwalk$chatfuel_col
make data frames for T1 and T5 that will merge with chatfuel
# get all chatfuel columns taht are in qualtrics (direct matches in qualtrics)
chatfuel_matches_qualtrics <- chatfuel %>%
dplyr::select(any_of(c(colnames(qualtrics))))
# df for T2 T3 T5
chatfuel_t2_t3_t4 <- chatfuel_matches_qualtrics %>%
filter(treatment_assign %in% c("T2", "T3", "T4"))
# t1 and t5 observations that are not found in qualtrics
# did not actually start the qualtrics survey
t1_t5_not_start_qualtrics <- chatfuel_matches_qualtrics %>%
filter(treatment_assign %in% c("T1", "T5")) %>%
filter(!(chatfuel.user.id %in%qualtrics$chatfuel.user.id))
# T1 and T5 that did start the survey
t1_t5_start_qualtrics <- chatfuel_matches_qualtrics %>%
filter(treatment_assign %in% c("T1", "T5")) %>%
filter(chatfuel.user.id %in%qualtrics$chatfuel.user.id)
# merge qualtrics to chatfuel T1
chatfuel_t1<- t1_t5_start_qualtrics %>%
filter(treatment_assign == "T1") %>%
dplyr::select("chatfuel.user.id") %>%
merge(qualtrics[qualtrics$treatment_assign == "T1", ], by = "chatfuel.user.id", all.x = T)
# merge qualtrics to chatfuel T5
chatfuel_t5 <- t1_t5_start_qualtrics %>%
filter(treatment_assign == "T5")%>%
dplyr::select("chatfuel.user.id")%>%
merge(qualtrics[qualtrics$treatment_assign == "T5", ], by = "chatfuel.user.id", all.x = T)
Final data frame binds these all together
df <- chatfuel_t2_t3_t4 %>%
smartbind(t1_t5_not_start_qualtrics) %>%
smartbind(chatfuel_t1) %>%
smartbind(chatfuel_t5) %>%
# corrects for T2-T4 not having a randomized column
mutate(randomized = ifelse(is.na(randomized), 1, randomized))
## Warning in smartbind(., chatfuel_t1): Column class mismatch for 'continue_0'.
## Converting column to class 'character'.
Dealing with duplicate phone numbers
#clean phone numbers
df$phone_number <- str_replace(df$phone_number, " ", "")
df$phone_number <- str_replace(df$phone_number, "-", "")
# get list of duplicated phone numbers
dup_phone_numbers <- df %>%
filter(phone_number != "") %>%
group_by(phone_number) %>%
count() %>%
# if count is greater than 1, set indicator equal to 1
mutate(dup_phone = ifelse(n >1, 1, 0)) %>%
filter(dup_phone == 1) %>%
pull(phone_number)
# we want to keep these - the earliest observation for a phone number
df_original_phonenumbers <- df %>%
filter(phone_number %in% dup_phone_numbers) %>%
group_by(phone_number) %>%
mutate(original_phone_number = ifelse(intro_start_time == min(intro_start_time), 1, 0)) %>%
filter(original_phone_number == 1) %>%
select(!original_phone_number)
# how many observation will be removed?
# note that we will keep the first observations of duplicated phone numbers
# as the "non duplicated observations". We drop the additional observations
nrow(df) -
nrow( df %>% filter(!(phone_number %in% dup_phone_numbers))) +
nrow(df_original_phonenumbers)
## [1] 834
df <- df %>%
# drop observations with duplicated phonenumbers
filter(!(phone_number %in% dup_phone_numbers)) %>%
# bind back the first observations for the duplicated phone numbers
rbind(df_original_phonenumbers)
# dylan, #sarah, #james, #saurabh, #kristine
lab_ids <- c("5036136209807958", "7720209621383182" ,"5059178407427161", "4655082084550853", "4255488774554006")
df <- df %>%
filter(!(chatfuel.user.id %in% lab_ids))
Do cleaning of used in analysis variables
df_clean <- df %>%
mutate(country_answer = tolower(country_answer)) %>%
mutate(country_answer = ifelse(!(country_answer %in% c("kenya", "ghana", "nigeria", "south africa", "other")), NA, country_answer)) %>%
mutate(consent = case_when(consent %in% c("yes", "Yes, let's start.") ~ "yes",
consent %in% c("no","No, let's stop here.") ~ "no",
is.na(consent) ~ NA_character_,
TRUE ~ "other"),
vax_status = case_when(vax_status %in% c("no", "No, I haven't.", "unvax") ~ "unvax",
vax_status %in% c("vax", "yes", "Yes, I have.") ~ "vax",
is.na(vax_status) ~ NA_character_,
TRUE ~ "other"),
motive =tolower(str_remove(motive_raw, "\\.")),
motive_main = case_when(motive_main_raw == "No clear benefit." ~ "benefit",
motive_main_raw %in% c("It's too risky.", "Too risky.") ~ "risky",
motive_main_raw == "Against my beliefs." ~ "beliefs",
motive_main_raw %in% c("No,you misunderstood", "No, you misunderstood", "No, you misunderstood.") ~ "misunderstood",
is.na(motive_main_raw) ~ NA_character_,
TRUE ~ "other"),
ability_raw = tolower(ability_raw),
ability = ifelse(ability_raw %in% c("easy", "somewhat hard", "really hard") | is.na(ability_raw), ability_raw, "other"),
ability_main = case_when(ability_main %in% c("availability", "No availability.") ~ "availability",
ability_main %in% c("time", "No time.") ~ "time",
ability_main %in% c("money", "Too costly.") ~ "money",
ability_main %in% c("misunderstood", "No, you misunderstood.") ~ "misunderstood",
is.na(ability_main) ~ NA_character_,
TRUE ~ "other")) %>%
select(!ability_raw & !motive_main_raw) %>%
mutate_all(list(~na_if(.,"")))
clean best treatment
df_clean <- df_clean %>%
mutate(new_best_treatment = case_when(vax_status == "unvax" & motive == "probably not" & best_treatment == "1" ~ "Job/school required it.",
vax_status == "unvax" & motive == "probably not" & best_treatment == "2" ~ "Family/friend endorses it.",
vax_status == "unvax" & motive == "probably not" & best_treatment == "3" ~ "More safety evidence.",
vax_status == "unvax" & motive == "probably not" & best_treatment == "4" ~ "New trusted info.",
vax_status == "unvax" & motive == "probably not" & best_treatment == "5" ~ "Rewards for vaccinating.",
vax_status == "unvax" & motive == "probably not" & best_treatment == "6" ~ "Nothing.",
vax_status == "unvax" & motive == "probably not" & best_treatment == "7" ~ "No, something else.",
vax_status == "unvax" & motive == "unsure" & best_treatment == "1" ~ "Job/school required it.",
vax_status == "unvax" & motive == "unsure" & best_treatment == "2" ~ "Family/friend endorses it.",
vax_status == "unvax" & motive == "unsure" & best_treatment == "3" ~ "More safety evidence.",
vax_status == "unvax" & motive == "unsure" & best_treatment == "4" ~ "New trusted info.",
vax_status == "unvax" & motive == "unsure" & best_treatment == "5" ~ "Rewards for vaccinating.",
vax_status == "unvax" & motive == "unsure" & best_treatment == "6" ~ "Convenient to vaccinate.",
vax_status == "unvax" & motive == "unsure" & best_treatment == "7" ~ "Time off work.",
vax_status == "unvax" & motive == "unsure" & best_treatment == "8" ~ "Reminders.",
vax_status == "unvax" & motive == "unsure" & best_treatment == "9" ~ "Appointment.",
vax_status == "unvax" & motive == "unsure" & best_treatment == "10" ~ "Nothing.",
vax_status == "unvax" & motive == "unsure" & best_treatment == "11" ~ "No, something else.",
vax_status == "unvax" & motive == "probably" & best_treatment == "1" ~ "Job/school required it.",
vax_status == "unvax" & motive == "probably" & best_treatment == "2" ~ "Convenient to vaccinate.",
vax_status == "unvax" & motive == "probably" & best_treatment == "3" ~ "Time off work.",
vax_status == "unvax" & motive == "probably" & best_treatment == "4" ~ "Reminders.",
vax_status == "unvax" & motive == "probably" & best_treatment == "5" ~ "Appointment.",
vax_status == "unvax" & motive == "probably" & best_treatment == "6" ~ "Nothing.",
vax_status == "unvax" & motive == "probably" & best_treatment == "7" ~ "No, something else.",
vax_status == "vax" & motive %in% c("was unsure", "no") & best_treatment == "1" ~ "Job/school required it.",
vax_status == "vax" & motive %in% c("was unsure", "no") & best_treatment == "2" ~ "Family/friend endorses it.",
vax_status == "vax" & motive %in% c("was unsure", "no") & best_treatment == "3" ~ "More safety evidence.",
vax_status == "vax" & motive %in% c("was unsure", "no") & best_treatment == "4" ~ "New trusted info.",
vax_status == "vax" & motive %in% c("was unsure", "no") & best_treatment == "5" ~ "Rewards for vaccinating.",
vax_status == "vax" & motive %in% c("was unsure", "no") & best_treatment == "6" ~ "Convenient to vaccinate.",
vax_status == "vax" & motive %in%c("was unsure", "no") & best_treatment == "7" ~ "Time off work.",
vax_status == "vax" & motive %in%c("was unsure", "no") & best_treatment == "8" ~ "Reminders.",
vax_status == "vax" & motive %in% c("was unsure", "no") & best_treatment == "9" ~ "Appointment.",
vax_status == "vax" & motive %in% c("was unsure", "no") & best_treatment == "10" ~ "Nothing.",
vax_status == "vax" & motive %in% c("was unsure", "no") & best_treatment == "11" ~ "No, something else.",
vax_status == "vax" & motive == "yes" & best_treatment == "1" ~ "Job/school required it.",
vax_status == "vax" & motive == "yes" & best_treatment == "2" ~ "Convenient to vaccinate.",
vax_status == "vax" & motive == "yes" & best_treatment == "3" ~ "Time off work.",
vax_status == "vax" & motive == "yes" & best_treatment == "4" ~ "Reminders.",
vax_status == "vax" & motive == "yes" & best_treatment == "5" ~ "Appointment.",
vax_status == "vax" & motive == "yes" & best_treatment == "6" ~ "Nothing.",
vax_status == "vax" & motive == "yes" & best_treatment == "7" ~ "No, something else.",
is.na(best_treatment) ~ NA_character_,
TRUE ~ "other")) %>%
select(!best_treatment) %>%
rename(best_treatment = new_best_treatment)
Clean Motive
| Vax | Unvax | Label |
|---|---|---|
| Yes | Probably | Yes |
| No | Probably Not | No |
| Was Unsure | Unsure | Unsure |
df_clean <- df_clean %>%
mutate(motive = case_when(motive %in% c("probably", "yes")~ "yes",
motive %in% c("probably not", "no") ~ "no",
motive %in% c("was unsure", "unsure") ~ "unsure",
is.na(motive) ~ NA_character_,
TRUE ~ "other"))
Add IDs for dylan outcomes
df_clean <- df_clean %>% dplyr::select(!phone_number)
df_clean$id <- 1:nrow(df_clean)
Free text nchar/nquestions outcome
# everyone
everyone<- c("opinion_friend_family", "opinion_conversation",
"best_treatment_proposal", "opinion_other_people_treatment",
"info_source", "info_confidence_explain", "challenge_have_covid", "why_change_mind",
"info_likelihood_why", "self_reflection_explain", "comfortable_not_.parts", "enjoyable_not_parts", "suggestions", "greating_answer")
# if motive != yes
### Check this - i am seeing inconsistencies
motive_free_text <- c("motive_reason", "motive_other")
#if risk_main or benefit_main or belief_main == no, none of these
motive_sub_other_free_text <- c("motive_sub_other")
# if motive_main_raw == "No, you misunderstood."
motive_main_raw_free_text <- c("motive_misunderstood_explain")
# ability_raw != "easy"
ability_free_text <- c("ability_reason", "ability_other")
# if availability_main or time_main or money_main == "No, something else."
ability_sub_other_free_text <- c("ability_sub_other")
# ability_main == "misunderstood"
ability_main_raw_free_text <- c("ability_misunderstood_explain")
# not sure
best_treatment_none_free_text <- c("best_treatment_none_explain")
# if best treatmet != button or = something else
best_treatment_other_free_text <- c("best_treatment_other")
# if vax_status = unvax and ability == "easy"
unvax_easy_free_text <- c("unvaxeasy_explain")
df_clean <- df_clean %>%
mutate(everyone_nchar = rowSums(across(all_of(everyone), nchar), na.rm = T),
everyone_nq = length(everyone),
motive_nchar = ifelse(motive != "yes",
rowSums(across(all_of(motive_free_text), nchar), na.rm = T),
0),
motive_nq = ifelse(motive != "yes",
length(motive_free_text),
0),
motive_sub_other_nchar = ifelse(benefit_main == "No, none of these."| belief_main == "No, none of these." | risk_main == "No, none of these.",
rowSums(across(all_of(motive_sub_other_free_text), nchar), na.rm = T),0),
motive_sub_other_nq = ifelse(benefit_main == "No, none of these."| belief_main == "No, none of these." | risk_main == "No, none of these.",
length(motive_sub_other_free_text), 0),
motive_misunderstood_explain_nchar= ifelse(motive_main== "misunderstood", rowSums(across(all_of(motive_main_raw_free_text), nchar), na.rm = T),0),
motive_misunderstood_explain_nq = ifelse(motive_main == "misunderstood", length(motive_main_raw_free_text),0),
ability_nchar =ifelse(ability != "easy", rowSums(across(all_of(ability_free_text), nchar), na.rm = T),0),
ability_nq = ifelse(ability != "easy", length(ability_free_text),0),
ability_sub_other_nchar = ifelse(time_main == "No, something else."| availability_main == "No, something else." | money_main == "No, something else.",
rowSums(across(all_of(ability_sub_other_free_text), nchar), na.rm = T),0),
ability_sub_other_nq = ifelse(time_main == "No, something else."| availability_main == "No, something else." | money_main == "No, something else.",
length(ability_sub_other_free_text), 0),
ability_misunderstood_explain_nchar= ifelse(ability_main == "misunderstood", rowSums(across(all_of(ability_main_raw_free_text), nchar), na.rm = T),0),
ability_misunderstood_explain_nq = ifelse(ability_main == "misunderstood", length(ability_main_raw_free_text),0),
best_treatment_none_nchar = ifelse(best_treatment == "Nothing.", rowSums(across(all_of(best_treatment_none_free_text), nchar), na.rm = T),0),
best_treatment_none_nq = ifelse(best_treatment == "Nothing.", length(best_treatment_none_free_text),0),
best_treatment_other_nchar = ifelse(!(best_treatment %in% c("Convenient to vaccinate.", "Job/school required it.", "More safety evidence.",
"Time off work.", "Family/friend endorses it.", "New trusted info.", "Reminders.", "Appointment.")),
rowSums(across(all_of(best_treatment_other_free_text), nchar), na.rm = T),0),
best_treatment_other_nq = ifelse(!(best_treatment %in% c("Convenient to vaccinate.", "Job/school required it.", "More safety evidence.",
"Time off work.", "Family/friend endorses it.", "New trusted info.", "Reminders.", "Appointment.")),
length(best_treatment_other_free_text),0),
unvaxeasy_other_nchar = ifelse(vax_status == "unvax" & ability == "easy", rowSums(across(all_of(unvax_easy_free_text), nchar), na.rm = T), 0),
unvaxeasy_other_nq = ifelse(vax_status == "unvax" & ability == "easy", length(unvax_easy_free_text), 0)
) %>%
mutate(nchar_all = rowSums(across(colnames(.)[grepl(pattern = "nchar", x = colnames(.))]), na.rm = T),
nq_all = rowSums(across(colnames(.)[grepl(pattern = "nq", x = colnames(.))]), na.rm = T),
nchar_per_question = nchar_all/nq_all) %>%
select(!ends_with("_nchar"), !ends_with("_nq"))
Randomized qualtrics treatments by complete/incomplete
df_clean %>%
filter(treatment_assign %in% c("T1", "T5")) %>%
select(treatment_assign, randomized, full_complete) %>%
group_by(treatment_assign, randomized, full_complete) %>%
count()
## # A tibble: 10 × 4
## # Groups: treatment_assign, randomized, full_complete [10]
## treatment_assign randomized full_complete n
## <chr> <dbl> <chr> <int>
## 1 T1 0 complete 719
## 2 T1 0 incomplete 58
## 3 T1 1 complete 491
## 4 T1 1 incomplete 1390
## 5 T5 0 complete 2250
## 6 T5 0 drop 612
## 7 T5 0 incomplete 927
## 8 T5 1 complete 476
## 9 T5 1 drop 23
## 10 T5 1 incomplete 1434
Add dylan’s heuristic outcomes
outcome_none <- read.csv(here("pilot8", "data", "hand_coded", "outcome_heuristics_best_treatment_none_explain.csv"), na.strings = "NA") %>%
select(id, outcome_heuristics_best_treatment_none = outcome_heuristics_best_treatment_proposal)
outcome_proposal <- read.csv(here("pilot8", "data", "hand_coded", "outcome_heuristics_best_treatment_proposal.csv"), na.strings = "NA") %>%
select(id, outcome_heuristics_best_treatment_proposal)
df_clean <- df_clean %>%
merge(outcome_none, by = "id", all.x =T) %>%
merge(outcome_proposal, by = "id", all.x = T)
Filter out participants who have went through the script multiple times
df_clean <- df_clean %>%
mutate(vax_q = !is.na(why_change_mind),
unvax_q = !is.na(vax_future),
both_q = vax_q & unvax_q) %>%
mutate(avail_q = !is.na(availability_main),
time_q = !is.na(time_main),
money_q = !is.na(money_main),
misunderstood_ability_q = ability_main == "misunderstood" & !is.na(ability_main)) %>%
rowwise() %>%
mutate(mult_ability = sum(avail_q, time_q, money_q, misunderstood_ability_q) > 1) %>%
mutate(belief_q = !is.na(belief_main),
risk_q = !is.na(risk_main),
benefit_q = !is.na(benefit_main),
misunderstood_q = (motive_main == "misunderstood" & !is.na(motive_main))) %>%
rowwise() %>%
mutate(mult_motive = sum(belief_q, risk_q, benefit_q, misunderstood_q) > 1) %>%
mutate(mult_motive2 = motive == "yes" & !is.na(motive) & (!is.na(motive_reason) | !is.na(motive_other) | !is.na(motive_misunderstood_explain) | !is.na(belief_main) | !is.na(benefit_main) | !is.na(risk_main))) %>%
mutate(mult_ability2 = ability == "easy" & !is.na(ability) & (!is.na(ability_reason) | !is.na(ability_other) | !is.na(ability_misunderstood_explain)| !is.na(time_main) | !is.na(money_main) | !is.na(availability_main))) %>%
mutate(overwriting = sum(mult_motive| mult_motive2 | mult_ability2 | mult_ability|both_q))
# overwriting by treatment
df_clean %>%
select( overwriting, treatment_assign) %>%
filter(overwriting == 1) %>%
group_by(overwriting, treatment_assign) %>%
count()
## # A tibble: 5 × 3
## # Groups: overwriting, treatment_assign [5]
## overwriting treatment_assign n
## <int> <chr> <int>
## 1 1 T1 4
## 2 1 T2 121
## 3 1 T3 135
## 4 1 T4 119
## 5 1 T5 3
df_clean <- df_clean %>%
filter(overwriting != 1) %>%
select(!overwriting)
Day time Cleaning
df_clean <- df_clean %>%
merge(chatfeul_signup, by = "chatfuel.user.id", all.x = T) %>%
mutate(signed_up = ifelse(randomized == 0, NA, signed.up)) %>%
select(!signed.up)
nrow(df_clean)
## [1] 13572
df_clean <- df_clean %>%
mutate(main_start_time = ymd_hms(signed_up),
month = month(signed_up, label = T),
hour = hour(signed_up),
wday = wday(signed_up, label = T),
time_group= ifelse(hour >= 8 & hour< 20, "8 am to 8 pm", "8 pm to 8 am" ),
day_time_group = ifelse(is.na(wday)|is.na(time_group), NA, paste(wday, time_group, sep = " "))) %>%
filter(month == "Aug" | is.na(month))
nrow(df_clean)
## [1] 13331
Clean comfortable and enjoyable cleaning
df_clean <- df_clean %>%
mutate(comfortable = str_remove(pattern = "\\.", string = comfortable),
enjoyable = str_remove(pattern = "\\.", string = enjoyable))
Merge Ads
df_clean <- df_clean %>%
merge(ad_to_qualtrics_id, by = "chatfuel.user.id", all.x = T)
df_clean <- merge(df_clean, chatfuel_gender, by = "chatfuel.user.id", all.x = T) %>%
# we don't know gender of non randomized folks not in chatfuel
mutate(gender = ifelse(randomized ==0, NA, gender)) %>%
mutate(ethnicity = tolower(ethnicity),
education = tolower(education),
location = tolower(location),
religion = tolower(religion),
religiosity = tolower(religiosity))
# clean up demographic variables
clean_up_demog <- function(df){
df$gender[!df$gender %in% c("male", "female", NA_character_)] <- "other"
df$ethnicity[!df$ethnicity %in% c("black or african", "white or caucasian", "prefer not to say", "asian or indian", "other", NA_character_)] <- "other"
df$education[!df$education %in% c("< high school", "high school", "some college", "2-year degree", "4-year degree", "graduate degree", "prefer not to say", NA_character_)] <- "other"
df$location[!df$location %in% c("urban", "suburban", "rural", "prefer not to say", NA_character_)] <- "other"
df$religion[grep("christ", tolower(df$religion))] <- "christian"
df$religion[!df$religion %in% c("christian", "african traditional", "islam", "hinduism", "judaism","no religion", "prefer not to say", NA_character_)] <- "other"
df$religiosity[!df$religiosity %in% c("somewhat religious", "not very religious", "very religious", "prefer not to say", NA_character_)] <- "other"
return(df)
}
df_clean <- clean_up_demog(df_clean)
Clean MCQ
# Clean MCQs
# Make sure values match the encoded responses, any leftovers marked as "other".
clean_up_mcq <- function(df){
# ability
df[!(df$ability_main %in% c("availability", "money", "other", "time", "misunderstood", NA_character_)), "ability_main"] <- "other"
df[!(df$ability %in% c("easy", "really hard", "somewhat hard", NA_character_)), "ability_raw"] <- "other"
df$availability_main <- str_replace(pattern = "\\.",string = tolower(df$availability_main), "")
df$availability_main <- ifelse(df$availability_main %in% c("it's too far away", "it was too far away"), "too far away", df$availability_main)
df[!(df$availability_main %in% c("no vaccines left", "too far away", "no, something else", NA_character_)), "availability_main"] <- "other"
df$money_main <- str_replace(pattern = "\\.",string = tolower(df$money_main), "")
df[!(df$money_main %in% c("no cash", "no insurance", "travel costs", "no, something else", NA_character_)), "money_main"] <- "other"
df$time_main <- str_replace(pattern = "\\.",string = tolower(df$time_main), "")
df[!(df$time_main %in% c("no childcare", "hard to get off work", "no time to research", "no, something else", NA_character_)), "time_main"] <- "other"
# motive
df[!(df$motive %in% c("no", "yes", "unsure", NA_character_)), "motive"] <- "other"
df$belief_main <- str_replace(pattern = "\\.",string = tolower(df$belief_main), "")
df[!(df$belief_main %in% c("not trust pharm/gov", "freedom to choose", "religious reasons", "no, none of these", NA_character_)), "belief_main"] <- "other"
df$risk_main <- str_replace(pattern = "\\.",string = tolower(df$risk_main), "")
df[!(df$risk_main %in% c("bad side effects", "not trust pharm/gov", "not enough testing", "no, none of these", NA_character_)), "risk_main"] <- "other"
df$benefit_main <- str_replace(pattern = "\\.",string = tolower(df$benefit_main), "")
df$benefit_main <- ifelse(df$benefit_main %in% c("i had covid already"), "had covid before", df$benefit_main)
df[!(df$benefit_main %in% c("had covid before", "unlikely to get sick", "covid not dangerous", "vaccines don't work", "no, none of these", NA_character_)), "benefit_main"] <- "other"
df[!(df$vax_status %in% c("unvax", "vax", NA_character_)), "vax_status"] <- "other"
df[!(df$vax_future %in% c("Sure", "No", "Maybe", NA_character_)), "vax_future"] <- "other"
return(df)
}
df_clean <- clean_up_mcq(df_clean)
Numeric Columns
numeric_cols <- function(df){
out <- df %>%
mutate(
vax_status_num = ifelse(vax_status == "vax" & !is.na(vax_status), 1, ifelse(vax_status == "unvax" & !is.na(vax_status), 0, NA)),
age = as.numeric(ifelse(cv_age %in% paste(18:99), cv_age, "")),
gender_num = case_when(
gender == "female" ~ 1,
gender == "male" ~ 0
),
education_num = case_when(
education == "< high school" ~ 1,
education == "high school" ~ 2,
education == "some college" ~ 3,
education == "2-year degree" ~ 4,
education == "4-year degree" ~ 5,
education == "graduate degree" ~ 6,
),
religiosity_num = case_when(
religiosity == "not very religious" ~ 1,
religiosity == "somewhat religious" ~ 2,
religiosity == "very religious" ~ 3,
),
location_num = case_when(
location == "rural" ~ 1,
location == "suburban" ~ 2,
location == "urban" ~ 3,
),
location =factor(location, levels = c("urban", "suburban", "rural", "other", "prefer not to say")),
black = ifelse(ethnicity == "black or african", 1, 0),
ethnicity = factor(ethnicity, levels=c("asian or indian",
"black or african",
"white or caucasian",
"other",
"prefer not to say")
))
return(out)
}
df_clean <- numeric_cols(df_clean)
Add in hand coded values
motive_consistency <- read.csv(here("pilot8", "data", "hand_coded", "motive_reason_consistency.csv")) %>%
select(!Note & !vax_status & !motive_reason)
opinion_consistency <- read.csv(here("pilot8", "data", "hand_coded", "opinion_friend_family_consistency.csv")) %>%
select(id, opinion_friend_family_consistency, opinion_friend_family_consistency_2)
motive_track_consistency <-read.csv(here("pilot8", "data", "hand_coded", "motive_track_consistency.csv")) %>%
select(id, motive_track_consistency)
df_clean <- merge(df_clean, motive_consistency, by = "id", all.x = T)
df_clean <- merge(df_clean, opinion_consistency, by = "id", all.x = T)
df_clean <- merge(df_clean, motive_track_consistency, by = "id", all.x = T)
Dylan’s csv with shutdown
write.csv(df_clean, here("pilot8", "data", "full_df_clean_with_shutdown.csv"), row.names = T)
Drop T5 shutdown observations
# how many will be dropped?
nrow(df_clean %>% filter(full_complete == "drop"))
## [1] 635
df_clean <- df_clean %>%
filter(full_complete != "drop")
write.csv(df_clean, here("pilot8", "data", "full_df_clean.csv"), row.names = T)
test <- read.csv(here("pilot8", "data", "full_df_clean.csv"))