rm(list = ls())
library(openxlsx)
pacman::p_load(DT, estimatr, kableExtra, readr, reshape2, tidyverse, xtable, dataMaid, ggcorrplot, ggmap, rpart, rpart.plot, pollster, wordcloud, tm, topicmodels, ldatuning, lda,SnowballC, pals, flextable, RColorBrewer, hrbrthemes, janitor, purrr, gridExtra, cowplot, rcompanion, nnet, texreg, compareGroups, factoextra, cluster, fastDummies, simputation, sentimentr, politeness, textir, xtable, plotrix, ggplot2)

set.seed(94305)
dir.create(file.path('tables'), showWarnings = FALSE)
dir.create(file.path('figures'), showWarnings = FALSE)
dir.create(file.path('freetext'), showWarnings = FALSE)

Data

library(dplyr)
files <- list.files('./data', full.names = TRUE)
# pre_files <- files[grepl("Main PRE", files)]
# post_files <- files[grepl("Main POST", files)]
# INPUT_FILENAME_PRE <- pre_files[which.max(file.info(pre_files)$mtime)]
# INPUT_FILENAME_POST <- post_files[which.max(file.info(post_files)$mtime)]
# df_full_pre <- read_csv(INPUT_FILENAME_PRE) %>% clean_names %>% remove_empty()
# df_full_post <- read_csv(INPUT_FILENAME_POST) %>% clean_names %>% remove_empty()

followup_files <- files[grepl("Followup", files)]
INPUT_FILENAME_FOLLOWUP <- followup_files[which.max(file.info(followup_files)$mtime)]
df_full_followup <- read_csv(INPUT_FILENAME_FOLLOWUP) %>% clean_names %>% remove_empty()

# df_final <- readRDS("../main_analysis/data/df_final.rds")


df <- df_full_followup %>% filter(finished == "True") %>% filter(consent_course == "Yes, I agree") %>% filter(status == "IP Address")

contact_list <- read.csv("../contact_lists/main/misinfo_followup.csv") 
contact_list <- contact_list %>% rename(external_reference = ExternalDataReference)
contact_list$external_reference <- paste0("+", as.character(contact_list$external_reference))
df <- left_join(df, contact_list, by = "external_reference")

# df_post_valid_phone <- unique(df_final$phone_num)
# df_post <- df_full_post[df_full_post$phone_num %in% df_post_valid_phone, ]
# clean_phone_number <- function(phone){
#   if (is.na(phone)){
#     return ("")
#   }
#   if (((substr(phone, 1, 1) == "0") | (substr(phone, 1, 1) == "O") | (substr(phone, 1, 1) == "+")) & nchar(phone) == 10){
#     phone <- substr(phone, 2, nchar(phone))
#   } else if (((substr(phone, 1, 3) == "254")) & nchar(phone) == 12){
#     phone <- substr(phone, 4, nchar(phone))
#   } else if (((substr(phone, 1, 4) == "2540")) & nchar(phone) == 13){
#     phone <- substr(phone, 5, nchar(phone))
#   } else if (((substr(phone, 1, 4) == "+254")) & nchar(phone) == 13){
#     phone <- substr(phone, 5, nchar(phone))
#   } else if (((substr(phone, 1, 4) == "±254")) & nchar(phone) == 13){
#     phone <- substr(phone, 5, nchar(phone))
#   } else if (((substr(phone, 1, 4) == "†254")) & nchar(phone) == 13){
#     phone <- substr(phone, 5, nchar(phone))
#   } else if (((substr(phone, 1, 5) == "+2540")) & nchar(phone) == 14){
#     phone <- substr(phone, 6, nchar(phone))
#   } else {
#     phone <- ""
#   }
#   return (phone)
#   
#   
# }
# df_post_phone <- sapply(df_post_valid_phone, function(x) clean_phone_number(x))
free_text_columns <- colnames(df)[startsWith(colnames(df), "reflective")]
reflective_questions <- c("What are some techniques that people use to create misleading social media posts?",
                          "When browsing your timeline in the last month, did you notice any post that looked misleading? If so, what made it seem misleading?",
                          "How did you feel when you saw the misleading post? If you haven't seen any misleading posts recently, how do you think you would feel?",
                          "Has the Inoculation against Misinformation course changed your behavior on social media? If so, how?",
                          "If you were to tell a friend what you learned in the course, what tip would you share?")


# for (i in 1:5){
#   output <- df[free_text_columns[i]]
#   colnames(output) <- reflective_questions[i]
#   write.csv(output, paste0("./freetext/reflective_questions_", i, ".csv"))
# }

Generate Descriptive Statistics

Create variables of interest

options(scipen=999)
treatments <- c("control_delayed", "control_alternative", "tactics", "emotion", "combo", "overall")
for (ftc in free_text_columns){
  question_num <- substr(ftc, 12, 12)
  output_name <- paste0("Reflective Question ", question_num)
  total_output <- c()
  for (t in treatments){
    if (t != "overall"){
      q <- df %>% filter(treatment == t) %>% select(ftc)
    } else {
      q <- df %>% select(ftc)
    }
    num_characters_mean <- mean(sapply(q, function(x) nchar(x)), na.rm = T)
    num_characters_se <- std.error(sapply(q, function(x) nchar(x)), na.rm = T)
    num_word_mean <- mean(sapply(q, function(x) str_count(x, "\\w+")), na.rm = T)
    num_word_se <- std.error(sapply(q, function(x) str_count(x, "\\w+")), na.rm = T)
    include_yes_mean <- mean(sapply(q, function(x) grepl("\\byes\\b", x, ignore.case = TRUE)), na.rm = T)
    include_yes_se <- std.error(sapply(q, function(x) grepl("\\byes\\b", x, ignore.case = TRUE)), na.rm = T)
    include_no_mean <- mean(sapply(q, function(x) grepl("\\bno\\b", x, ignore.case = TRUE) & !grepl("\\byes\\b", x, ignore.case = TRUE)), na.rm = T)
    include_no_se <- std.error(sapply(q, function(x) grepl("\\bno\\b", x, ignore.case = TRUE) & !grepl("\\byes\\b", x, ignore.case = TRUE)), na.rm = T)
    only_yes_mean <- mean(sapply(q, function(x) grepl("\\byes\\b", x, ignore.case = TRUE) & nchar(x) <= 5), na.rm = T)
    only_yes_se <- std.error(sapply(q, function(x) grepl("\\byes\\b", x, ignore.case = TRUE) & nchar(x) <= 5), na.rm = T)
    only_no_mean <- mean(sapply(q, function(x) grepl("\\bno\\b", x, ignore.case = TRUE) & !grepl("\\byes\\b", x, ignore.case = TRUE) & nchar(x) <= 5), na.rm = T)
    only_no_se <- std.error(sapply(q, function(x) grepl("\\bno\\b", x, ignore.case = TRUE) & !grepl("\\byes\\b", x, ignore.case = TRUE) & nchar(x) <= 5), na.rm = T)
    more_than_yes_mean <- mean(sapply(q, function(x) grepl("\\byes\\b", x, ignore.case = TRUE) & nchar(x) > 5), na.rm = T)
    more_than_yes_se <- std.error(sapply(q, function(x) grepl("\\byes\\b", x, ignore.case = TRUE) & nchar(x) > 5), na.rm = T)
    more_than_no_mean <- mean(sapply(q, function(x) grepl("\\bno\\b", x, ignore.case = TRUE) & !grepl("\\byes\\b", x, ignore.case = TRUE) & nchar(x) > 5), na.rm = T)
    more_than_no_se <- std.error(sapply(q, function(x) grepl("\\bno\\b", x, ignore.case = TRUE) & !grepl("\\byes\\b", x, ignore.case = TRUE) & nchar(x) > 5), na.rm = T)
    other_mean <- mean(sapply(q, function(x) !(grepl("\\bno\\b", x, ignore.case = TRUE) | grepl("\\byes\\b", x, ignore.case = TRUE))), na.rm = T)
    other_se <- std.error(sapply(q, function(x) !(grepl("\\bno\\b", x, ignore.case = TRUE) | grepl("\\byes\\b", x, ignore.case = TRUE))), na.rm = T)
    
    output <- c(round(num_characters_mean, 4), paste0("(", round(num_characters_se, 2), ")"), 
                round(num_word_mean, 4), paste0("(", round(num_word_se, 2), ")"), 
                round(include_yes_mean, 4), paste0("(", round(include_yes_se, 2), ")"), 
                round(include_no_mean, 4), paste0("(", round(include_no_se, 2), ")"), 
                round(only_yes_mean, 4), paste0("(", round(only_yes_se, 2), ")"), 
                round(only_no_mean, 4), paste0("(", round(only_no_se, 2), ")"), 
                round(more_than_yes_mean, 4), paste0("(", round(more_than_yes_se, 2), ")"), 
                round(more_than_no_mean, 4), paste0("(", round(more_than_no_se, 2), ")"), 
                round(other_mean, 4), paste0("(", round(other_se, 2), ")"))
    total_output <- cbind(total_output, output)
  }
  colnames(total_output) <- c("Control", "Reminder", "Tactics", "Emotion", "Combo", "Overall")
  rownames(total_output) <- c("Number of Characters", "",
                              "Number of Words", "",
                              "Yes Responses", "",
                              "No Responses", "",
                              "Only 'Yes' Responses", "",
                              "Only 'No' Responses", "",
                              "More than 'Yes' Responses", "",
                              "More than 'No' Responses", "",
                              "Other Responses", "")
  print(xtable(total_output, type = "latex", caption = paste0("Descriptive Statistics for ", output_name)), file = paste0("./freetext/", ftc, ".latex"))
}
df$num_characters_4 <- sapply(df$reflective_4, function(x) nchar(x))
df$num_characters_5 <- sapply(df$reflective_5, function(x) nchar(x))
test_4 <- t.test(df$num_characters_4[df$treatment == "emotion"], df$num_characters_4[df$treatment == "control_alternative"], alternative = c("greater"))
test_5 <- t.test(df$num_characters_5[df$treatment == "emotion"], df$num_characters_5[df$treatment == "control_alternative"], alternative = c("greater"))


paste0("diff: ", round(test_4$estimate[1] - test_4$estimate[2], 1), " se: ", round(test_4$stderr, 2))
## [1] "diff: 3.3 se: 1.82"
paste0("diff: ", round(test_5$estimate[1] - test_5$estimate[2], 1), " se: ", round(test_5$stderr, 2))
## [1] "diff: 2.9 se: 1.91"

Reflective Question 4

Has the Inoculation against Misinformation course changed your behavior on social media? If so, how?

Heuristics

Current Heuristics (contain any of the following keywords): stop|think|first|check|evaluate|identify|investigate|analyze|research|pause|question|verify|verified|identified|prove|proved|differentiate|distinguish|(tell&difference)|spot|confirm|confirmed|researched|analyzed|before|(tell&between)|ask myself|asked myself

df_4 <- df[, c("treatment", "reflective_4")]
df_4$contain_course_info <- ifelse(grepl("stop|think|first|check|evaluate|identify|investigate|analyze|research|pause|question|verify|verified|identified|prove|proved|differentiate|distinguish|tell.*difference|spot|confirm|confirmed|researched|analyzed|before|tell.*between|ask myself|asked myself", df_4$reflective_4), 1L, 0L)
df_4 %>% group_by(treatment) %>% summarise(percentage_mentioned = mean(contain_course_info), count_mentioned = sum(contain_course_info), total_in_group = n())
output <- df_4[order(df_4$contain_course_info, decreasing = TRUE), c("treatment", "contain_course_info", "reflective_4")]
colnames(output) <- c("treatment_group", "contain_course_keyword", reflective_questions[4])
write.csv(output, paste0("./freetext/reflective_questions_4_heuristics.csv"))

Contain Heuristics

datatable(output[output$contain_course_keyword == 1, 3])

Does not contain heuristics

datatable(output[output$contain_course_keyword == 0, 3])

Conduct Hypothesis Test

  1. All treatment courses aggregated vs reminder
  2. All treatment courses aggregated vs (reminder + control)
  3. Emotion vs reminder
  4. Tactics vs reminder
  5. Combo vs reminder
test_1_q4 <- t.test(output %>% filter(treatment_group %in% c("emotion", "tactics", "combo")) %>% select(contain_course_keyword), 
                    output %>% filter(treatment_group %in% c("control_alternative")) %>% select(contain_course_keyword),
                    alternative = "greater")
test_2_q4 <- t.test(output %>% filter(treatment_group %in% c("control_delayed")) %>% select(contain_course_keyword), 
                    output %>% filter(treatment_group %in% c("control_alternative")) %>% select(contain_course_keyword),
                    alternative = "greater")
test_3_q4 <- t.test(output %>% filter(treatment_group %in% c("emotion")) %>% select(contain_course_keyword), 
                    output %>% filter(treatment_group %in% c("control_alternative")) %>% select(contain_course_keyword),
                    alternative = "greater")
test_4_q4 <- t.test(output %>% filter(treatment_group %in% c("tactics")) %>% select(contain_course_keyword), 
                    output %>% filter(treatment_group %in% c("control_alternative")) %>% select(contain_course_keyword),
                    alternative = "greater")
test_5_q4 <- t.test(output %>% filter(treatment_group %in% c("combo")) %>% select(contain_course_keyword), 
                    output %>% filter(treatment_group %in% c("control_alternative")) %>% select(contain_course_keyword),
                    alternative = "greater")

results_q4 = data.frame(matrix(NA,5,6))
colnames(results_q4) = c("estimates","std.err","CI_lw","CI_up","ts","p_val")
rownames(results_q4) = c("Test 1 - All Treatment Courses v. Reminder",
                         "Test 2 - Control (Combo) v. Reminder", 
                         "Test 3 - Emotion v. Reminder",
                         "Test 4 - Tactics v. Reminder",
                         "Test 5 - Combo v. Reminder")

results_q4$estimates <- c(test_1_q4$estimate[1] - test_1_q4$estimate[2],
                          test_2_q4$estimate[1] - test_2_q4$estimate[2],
                          test_3_q4$estimate[1] - test_3_q4$estimate[2],
                          test_4_q4$estimate[1] - test_4_q4$estimate[2],
                          test_5_q4$estimate[1] - test_5_q4$estimate[2])
results_q4$std.err <- c(test_1_q4$stderr,
                        test_2_q4$stderr,
                        test_3_q4$stderr,
                        test_4_q4$stderr,
                        test_5_q4$stderr)
results_q4$CI_lw <- c(test_1_q4$conf.int[1],
                      test_2_q4$conf.int[1],
                      test_3_q4$conf.int[1],
                      test_4_q4$conf.int[1],
                      test_5_q4$conf.int[1])
results_q4$CI_up <- c(test_1_q4$conf.int[2],
                      test_2_q4$conf.int[2],
                      test_3_q4$conf.int[2],
                      test_4_q4$conf.int[2],
                      test_5_q4$conf.int[2])
results_q4$ts <- c(test_1_q4$statistic,
                   test_2_q4$statistic,
                   test_3_q4$statistic,
                   test_4_q4$statistic,
                   test_5_q4$statistic)
results_q4$p_val <- c(test_1_q4$p.value,
                      test_2_q4$p.value,
                      test_3_q4$p.value,
                      test_4_q4$p.value,
                      test_5_q4$p.value)
results_q4$p_val_holm = p.adjust(results_q4$p_val,"holm")

results_q4

Plot

plot_gen = function(data,color,ylab,xlab,y_min,y_max,title,num_size,baseline,tests){
    
pic = ggplot(data=data,aes(x=label,y=mean))+
        theme_bw()+
        theme(axis.line.y = element_line(colour="black"),panel.border = element_blank(), 
              panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        legend.title = element_blank(),      
        legend.position = c(.9,.8),legend.justification = c("right", "bottom"), 
        legend.key = element_rect(colour = "transparent"),
        legend.box.just = "right", legend.text = element_text(size=12), legend.margin = margin(6, 6, 6, 6),
        legend.box.background = element_rect( fill="transparent", size=1),legend.background = element_blank()) +
    
        geom_bar(stat='identity',width=0.75,color="black",fill=color) +
        geom_errorbar(aes(ymin=mean-1.96*sd, ymax = mean+1.96*sd),width=0.1, size=1.5, position = position_dodge(1)) 
    
        if (baseline){
        
        pic = pic + geom_text(aes(label=as.character(formatC(mean,digits=3,format="f")),y=mean+sign(mean)*sd),vjust = -2.5,size=num_size)+
        geom_text(aes(label=paste("[",as.character(round(mean/mean[2]*100)),"%]",sep=""),y=mean+sign(mean)*sd),vjust = -1,size=num_size-1)}
    
        if (!baseline){
            
        pic = pic + geom_text(aes(label=as.character(formatC(mean,digits=3,format="f")),y=mean+sd),vjust = -1,size=num_size)    
        }
    
               
        
        pic = pic + geom_hline(yintercept = 0)+

        geom_segment(aes(x = 2, y = 0.92, xend = 5, yend = 0.92), color = "orange2",size=2)+
        geom_segment(aes(x = 2, y = 0.92, xend = 2, yend = 0.72), color = "orange2",size=2)+
        geom_segment(aes(x = 5, y = 0.92, xend = 5, yend = 0.60), color = "orange2",size=2)+
    
        geom_segment(aes(x = 2, y = 0.82, xend = 4, yend = 0.82), color = "pink2",size=2)+
        geom_segment(aes(x = 2, y = 0.82, xend = 2, yend = 0.72), color = "pink2",size=2)+
        geom_segment(aes(x = 4, y = 0.82, xend = 4, yend = 0.56), color = "pink2",size=2)+
    
        geom_segment(aes(x = 2, y = 0.72, xend = 3, yend = 0.72), color = "turquoise2",size=2)+
        geom_segment(aes(x = 2, y = 0.72, xend = 2, yend = 0.72), color = "turquoise2",size=2)+
        geom_segment(aes(x = 3, y = 0.72, xend = 3, yend = 0.62), color = "turquoise2",size=2)+
    
        geom_segment(aes(x = 1, y = 0.85, xend = 2, yend = 0.85), color = "gray70",size=2)+
        geom_segment(aes(x = 1, y = 0.85, xend = 1, yend = 0.68), color = "gray70",size=2)+
        geom_segment(aes(x = 2, y = 0.85, xend = 2, yend = 0.68), color = "gray70",size=2)+

    
        ggplot2::annotate("text",x=4.5,y=0.875,label = paste0(tests$estimates[4], "\n(",tests$std.err[4],")"),size=11) +
        ggplot2::annotate("text",x=3.5,y=0.775,label = paste0(tests$estimates[3], "\n(",tests$std.err[3],")"),size=11) +    
        ggplot2::annotate("text",x=2.5,y=0.675,label = paste0(tests$estimates[2], "\n(",tests$std.err[2],")"),size=11) +
        ggplot2::annotate("text",x=1.5,y=0.805,label = paste0(tests$estimates[1], "\n(",tests$std.err[1],")"),size=11) +
    
        #annotate("text",x=4.25,y=1.02,label = "Diff:\n",size=11) +
        #annotate("text",x=3.25,y=0.92,label = "Diff:\n",size=11) +    
        #annotate("text",x=2.25,y=0.82,label = "Diff:\n",size=11) +
        #annotate("text",x=1.25,y=0.95,label = "Diff:\n",size=11) +

    
       labs(y=ylab,x=xlab)+
        theme(axis.text.x = element_text(color = "black", size = 40, angle = 0, hjust = .5, vjust = 5, face = "plain"),
        axis.text.y = element_text(color = "black", size = 45, angle = 0, hjust = 0, vjust = .5, face = "plain",
                                   margin=unit(rep(0.5,4),"cm")),  
        axis.title.x = element_text(color = "black", size = 55, angle = 0, hjust = .5, vjust = 3, face = "bold"),
        axis.title.y = element_text(color = "black", size = 60, angle = 90, hjust = .5, vjust = .5, face = "bold"),
        axis.ticks.length.y = unit(-0.25,"cm"), axis.ticks.x=element_blank())+
        scale_y_continuous(limits = c(y_min,y_max)) +
        scale_x_discrete(limits = data$label) +

    
        ggtitle(title) + 
          theme(plot.title = element_text(face='bold', size=50, hjust=0.5, vjust=0.5))
        return(pic)
}
plot_q4_data = data.frame(matrix(NA,5,3))
colnames(plot_q4_data) = c("label","mean","sd")

plot_q4_data[,1:2] = aggregate(contain_course_keyword~treatment_group,output,mean)
plot_q4_data[,3] = aggregate(contain_course_keyword~treatment_group,output,std.error)[,2]


plot_q4_data$label[plot_q4_data$label == "control_delayed"] <- "No-course Control"
plot_q4_data$label[plot_q4_data$label == "control_alternative"] <- "Reminder Control"
plot_q4_data$label[plot_q4_data$label == "tactics"] <- "Info"
plot_q4_data$label[plot_q4_data$label == "emotion"] <- "Emotions"
plot_q4_data$label[plot_q4_data$label == "combo"] <- "Combo"

plot_q4_data <- plot_q4_data %>%
  slice(match(c("No-course Control", "Reminder Control",  "Info", "Emotions", "Combo"), label))

tests_plot_q4 = data.frame(matrix(NA),4,2)
tests_plot_q4 = round(results_q4[2:5,c("estimates","std.err")],3)


plot_gen(plot_q4_data,c("gray70","royalblue3","turquoise2","pink2","orange2"),"Proportion of Participant Responses containing Keywords","Assignment group",-0.0003,1,"",5,TRUE, tests_plot_q4)

WordCloud

vector_wc <- df_4$reflective_4
# Create corpus
docs <- Corpus(VectorSource(vector_wc))

# Clean corpus
docs <-
  docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace) %>%
  tm_map(content_transformer(tolower)) %>%
  tm_map(removeWords, stopwords("english"))

# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)

# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 1, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))

# dataset_names <- list('Contain' = output[output$contain_course_keyword == 1, ], 
#                       'Does not contain' = output[output$contain_course_keyword == 0, ])
# openxlsx::write.xlsx(dataset_names,  paste0("./freetext/reflective_questions_4_heuristics.xlsx"))

Reflective Question 5

If you were to tell a friend what you learned in the course, what tip would you share?

Heuristics

Current Heuristics (contain any of the following keywords): stop|think|first|check|evaluate|identify|investigate|analyze|research|pause|question|verify|verified|identified|prove|proved|differentiate|distinguish|(tell&difference)|spot|confirm|confirmed|researched|analyzed|before|(tell&between)|ask myself|asked myself

df_5 <- df[, c("treatment", "reflective_5")]
df_5$contain_course_info <- ifelse(grepl("stop|think|first|check|evaluate|identify|investigate|analyze|research|pause|question|verify|verified|identified|prove|proved|differentiate|distinguish|tell.*difference|spot|confirm|confirmed|researched|analyzed|before|tell.*between|ask myself|asked myself", df_5$reflective_5), 1L, 0L)
df_5 %>% group_by(treatment) %>% summarise(percentage_mentioned = mean(contain_course_info), count_mentioned = sum(contain_course_info), total_in_group = n())
output <- df_5[order(df_5$contain_course_info, decreasing = TRUE), c("treatment", "contain_course_info", "reflective_5")]
colnames(output) <- c("treatment_group", "contain_course_keyword", reflective_questions[5])
# write.csv(output, paste0("./freetext/reflective_questions_5_heuristics.csv"))

# library(openxlsx)
# dataset_names <- list('Contain' = output[output$contain_course_keyword == 1, ], 
#                       'Does not contain' = output[output$contain_course_keyword == 0, ])
# openxlsx::write.xlsx(dataset_names,  paste0("./freetext/reflective_questions_5_heuristics.xlsx"))

Contain Heuristics

datatable(output[output$contain_course_keyword == 1, 3])

Does not contain heuristics

datatable(output[output$contain_course_keyword == 0, 3])

Conduct Hypothesis Test

  1. All treatment courses aggregated vs reminder
  2. All treatment courses aggregated vs (reminder + control)
  3. Emotion vs reminder
  4. Tactics vs reminder
  5. Combo vs reminder
test_1_q5 <- t.test(output %>% filter(treatment_group %in% c("emotion", "tactics", "combo")) %>% select(contain_course_keyword), 
                    output %>% filter(treatment_group %in% c("control_alternative")) %>% select(contain_course_keyword),
                    alternative = "greater")
test_2_q5 <- t.test(output %>% filter(treatment_group %in% c("control_delayed")) %>% select(contain_course_keyword), 
                    output %>% filter(treatment_group %in% c("control_alternative")) %>% select(contain_course_keyword),
                    alternative = "greater")
test_3_q5 <- t.test(output %>% filter(treatment_group %in% c("emotion")) %>% select(contain_course_keyword), 
                    output %>% filter(treatment_group %in% c("control_alternative")) %>% select(contain_course_keyword),
                    alternative = "greater")
test_4_q5 <- t.test(output %>% filter(treatment_group %in% c("tactics")) %>% select(contain_course_keyword), 
                    output %>% filter(treatment_group %in% c("control_alternative")) %>% select(contain_course_keyword),
                    alternative = "greater")
test_5_q5 <- t.test(output %>% filter(treatment_group %in% c("combo")) %>% select(contain_course_keyword), 
                    output %>% filter(treatment_group %in% c("control_alternative")) %>% select(contain_course_keyword),
                    alternative = "greater")

results_q5 = data.frame(matrix(NA,5,6))
colnames(results_q5) = c("estimates","std.err","CI_lw","CI_up","ts","p_val")
rownames(results_q5) = c("Test 1 - All Treatment Courses v. Reminder",
                         "Test 2 - Control (Combo) v. Reminder", 
                         "Test 3 - Emotion v. Reminder",
                         "Test 4 - Tactics v. Reminder",
                         "Test 5 - Combo v. Reminder")

results_q5$estimates <- c(test_1_q5$estimate[1] - test_1_q5$estimate[2],
                          test_2_q5$estimate[1] - test_2_q5$estimate[2],
                          test_3_q5$estimate[1] - test_3_q5$estimate[2],
                          test_4_q5$estimate[1] - test_4_q5$estimate[2],
                          test_5_q5$estimate[1] - test_5_q5$estimate[2])
results_q5$std.err <- c(test_1_q5$stderr,
                        test_2_q5$stderr,
                        test_3_q5$stderr,
                        test_4_q5$stderr,
                        test_5_q5$stderr)
results_q5$CI_lw <- c(test_1_q5$conf.int[1],
                      test_2_q5$conf.int[1],
                      test_3_q5$conf.int[1],
                      test_4_q5$conf.int[1],
                      test_5_q5$conf.int[1])
results_q5$CI_up <- c(test_1_q5$conf.int[2],
                      test_2_q5$conf.int[2],
                      test_3_q5$conf.int[2],
                      test_4_q5$conf.int[2],
                      test_5_q5$conf.int[2])
results_q5$ts <- c(test_1_q5$statistic,
                   test_2_q5$statistic,
                   test_3_q5$statistic,
                   test_4_q5$statistic,
                   test_5_q5$statistic)
results_q5$p_val <- c(test_1_q5$p.value,
                      test_2_q5$p.value,
                      test_3_q5$p.value,
                      test_4_q5$p.value,
                      test_5_q5$p.value)
results_q5$p_val_holm = p.adjust(results_q5$p_val,"holm")

results_q5

Plot

plot_q5_data = data.frame(matrix(NA,5,3))
colnames(plot_q5_data) = c("label","mean","sd")

plot_q5_data[,1:2] = aggregate(contain_course_keyword~treatment_group,output,mean)
plot_q5_data[,3] = aggregate(contain_course_keyword~treatment_group,output,std.error)[,2]


plot_q5_data$label[plot_q5_data$label == "control_delayed"] <- "No-course Control"
plot_q5_data$label[plot_q5_data$label == "control_alternative"] <- "Reminder Control"
plot_q5_data$label[plot_q5_data$label == "tactics"] <- "Info"
plot_q5_data$label[plot_q5_data$label == "emotion"] <- "Emotions"
plot_q5_data$label[plot_q5_data$label == "combo"] <- "Combo"

plot_q5_data <- plot_q5_data %>%
  slice(match(c("No-course Control", "Reminder Control",  "Info", "Emotions", "Combo"), label))

tests_plot_q5 = data.frame(matrix(NA),4,2)
tests_plot_q5 = round(results_q5[2:5,c("estimates","std.err")],3)


plot_gen(plot_q5_data,c("gray70","royalblue3","turquoise2","pink2","orange2"),"Proportion of Participant Responses containing Keywords","Assignment group",-0.0003,1,"",5,TRUE, tests_plot_q5)

WordCloud

vector_wc <- df_5$reflective_5
# Create corpus
docs <- Corpus(VectorSource(vector_wc))

# Clean corpus
docs <-
  docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace) %>%
  tm_map(content_transformer(tolower)) %>%
  tm_map(removeWords, stopwords("english"))

# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)

# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 1, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))

Comparing Gender

df_final <- readRDS("df_final.rds")

df_why <- df_final %>% select(user, accuracy, treatment, att_check_pre, att_check_post, 
                              reflective_1, reflective_2, reflective_3, reflective_4, reflective_5, 
                              gender)
df_why <- df_why %>% distinct()

Reflective Question 4

Emotions

WordCloud with Word Frequency >= 25

Man

vector_wc <- df_why %>% filter(treatment == "emotion", gender == "Man") %>% select(reflective_4)
# Create corpus
docs <- VCorpus(VectorSource(vector_wc))

# Clean corpus
docs <-
  docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace) %>%
  tm_map(content_transformer(tolower)) %>%
  tm_map(removeWords, stopwords("english"))

BigramTokenizer <- function(x) c(unlist(lapply(ngrams(words(x), 1), paste, collapse = " "), use.names = FALSE), 
                                 unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE))

# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs,  control = list(tokenize = BigramTokenizer)))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)



# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 25, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))

row.names(df_freetext) <- 1:nrow(df_freetext)
as.matrix(df_freetext[1:20,]) %>% kable(format = 'pipe') %>% print()
word freq
yes 343
information 275
na na 203
na yes 148
share 110
media 105
social 102
social media 100
post 97
misinformation 90
sharing 73
posts 72
now 66
misleading 65
changed 63
dont 61
can 55
know 51
true 49
careful 42

Woman

vector_wc <- df_why %>% filter(treatment == "emotion", gender == "Woman") %>% select(reflective_4)
# Create corpus
docs <- VCorpus(VectorSource(vector_wc))

# Clean corpus
docs <-
  docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace) %>%
  tm_map(content_transformer(tolower)) %>%
  tm_map(removeWords, stopwords("english"))

BigramTokenizer <- function(x) c(unlist(lapply(ngrams(words(x), 1), paste, collapse = " "), use.names = FALSE), 
                                 unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE))

# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs,  control = list(tokenize = BigramTokenizer)))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)



# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 25, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))

row.names(df_freetext) <- 1:nrow(df_freetext)
as.matrix(df_freetext[1:20,]) %>% kable(format = 'pipe') %>% print()
word freq
yes 207
information 158
na na 103
social 85
media 84
social media 81
na yes 74
post 73
share 73
dont 54
sharing 51
misinformation 47
true 37
believe 33
misleading 33
posts 33
changed 32
know 31
now 30
sure 26

Reasoning

WordCloud with Word Frequency >= 25

Man

vector_wc <- df_why %>% filter(treatment == "tactics", gender == "Man") %>% select(reflective_4)
# Create corpus
docs <- VCorpus(VectorSource(vector_wc))

# Clean corpus
docs <-
  docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace) %>%
  tm_map(content_transformer(tolower)) %>%
  tm_map(removeWords, stopwords("english"))

BigramTokenizer <- function(x) c(unlist(lapply(ngrams(words(x), 1), paste, collapse = " "), use.names = FALSE), 
                                 unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE))

# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs,  control = list(tokenize = BigramTokenizer)))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)



# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 25, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))

row.names(df_freetext) <- 1:nrow(df_freetext)
as.matrix(df_freetext[1:20,]) %>% kable(format = 'pipe') %>% print()
word freq
yes 356
information 289
na na 164
na yes 133
social 130
media 128
social media 125
post 84
posts 80
now 77
share 76
misinformation 74
sharing 69
misleading 68
can 59
keen 56
dont 54
changed 52
careful 49
able 44

Woman

vector_wc <- df_why %>% filter(treatment == "tactics", gender == "Woman") %>% select(reflective_4)
# Create corpus
docs <- VCorpus(VectorSource(vector_wc))

# Clean corpus
docs <-
  docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace) %>%
  tm_map(content_transformer(tolower)) %>%
  tm_map(removeWords, stopwords("english"))

BigramTokenizer <- function(x) c(unlist(lapply(ngrams(words(x), 1), paste, collapse = " "), use.names = FALSE), 
                                 unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE))

# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs,  control = list(tokenize = BigramTokenizer)))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)



# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 25, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))

row.names(df_freetext) <- 1:nrow(df_freetext)
as.matrix(df_freetext[1:20,]) %>% kable(format = 'pipe') %>% print()
word freq
yes 183
information 133
na na 95
post 66
na yes 63
media 60
social 59
social media 58
dont 44
share 41
misleading 40
misinformation 38
posts 37
now 34
true 30
know 28
sharing 27
sure 27
changed 26
yesi 26

Combo

WordCloud with Word Frequency >= 25

Man

vector_wc <- df_why %>% filter(treatment == "combo", gender == "Man") %>% select(reflective_4)
# Create corpus
docs <- VCorpus(VectorSource(vector_wc))

# Clean corpus
docs <-
  docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace) %>%
  tm_map(content_transformer(tolower)) %>%
  tm_map(removeWords, stopwords("english"))

BigramTokenizer <- function(x) c(unlist(lapply(ngrams(words(x), 1), paste, collapse = " "), use.names = FALSE), 
                                 unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE))

# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs,  control = list(tokenize = BigramTokenizer)))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)



# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 25, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))

row.names(df_freetext) <- 1:nrow(df_freetext)
as.matrix(df_freetext[1:20,]) %>% kable(format = 'pipe') %>% print()
word freq
yes 385
information 260
na na 167
na yes 160
post 127
media 120
social 120
social media 117
share 107
misleading 91
posts 90
sharing 78
dont 74
now 71
misinformation 64
can 53
changed 49
misleading information 46
made 42
research 38

Woman

vector_wc <- df_why %>% filter(treatment == "combo", gender == "Woman") %>% select(reflective_4)
# Create corpus
docs <- VCorpus(VectorSource(vector_wc))

# Clean corpus
docs <-
  docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace) %>%
  tm_map(content_transformer(tolower)) %>%
  tm_map(removeWords, stopwords("english"))

BigramTokenizer <- function(x) c(unlist(lapply(ngrams(words(x), 1), paste, collapse = " "), use.names = FALSE), 
                                 unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE))

# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs,  control = list(tokenize = BigramTokenizer)))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)



# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 25, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))

row.names(df_freetext) <- 1:nrow(df_freetext)
as.matrix(df_freetext[1:20,]) %>% kable(format = 'pipe') %>% print()
word freq
yes 177
information 144
na na 95
media 69
na yes 69
social 69
social media 66
share 64
post 56
dont 55
misleading 43
sharing 40
misinformation 39
now 38
posts 37
sure 33
keen 30
see 29
believe 26
yesi 26

Reflective Question 5

Emotions

WordCloud with Word Frequency >= 25

Man

vector_wc <- df_why %>% filter(treatment == "emotion", gender == "Man") %>% select(reflective_5)
# Create corpus
docs <- VCorpus(VectorSource(vector_wc))

# Clean corpus
docs <-
  docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace) %>%
  tm_map(content_transformer(tolower)) %>%
  tm_map(removeWords, stopwords("english"))

BigramTokenizer <- function(x) c(unlist(lapply(ngrams(words(x), 1), paste, collapse = " "), use.names = FALSE), 
                                 unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE))

# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs,  control = list(tokenize = BigramTokenizer)))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)



# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 25, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))

row.names(df_freetext) <- 1:nrow(df_freetext)
as.matrix(df_freetext[1:20,]) %>% kable(format = 'pipe') %>% print()
word freq
information 347
na na 189
misinformation 188
media 184
social 181
social media 173
sharing 120
misleading 117
share 116
post 100
always 93
misleading information 56
posts 56
true 52
careful 48
avoid 45
sure 44
disinformation 41
dont 40
misinformation na 38

Woman

vector_wc <- df_why %>% filter(treatment == "emotion", gender == "Woman") %>% select(reflective_5)
# Create corpus
docs <- VCorpus(VectorSource(vector_wc))

# Clean corpus
docs <-
  docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace) %>%
  tm_map(content_transformer(tolower)) %>%
  tm_map(removeWords, stopwords("english"))

BigramTokenizer <- function(x) c(unlist(lapply(ngrams(words(x), 1), paste, collapse = " "), use.names = FALSE), 
                                 unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE))

# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs,  control = list(tokenize = BigramTokenizer)))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)



# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 25, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))

row.names(df_freetext) <- 1:nrow(df_freetext)
as.matrix(df_freetext[1:20,]) %>% kable(format = 'pipe') %>% print()
word freq
information 182
media 137
social 135
social media 133
misinformation 103
na na 103
share 80
sharing 67
misleading 59
post 59
true 47
posts 43
always 41
careful 31
believe 30
sure 30
misleading information 29
dont 28
never 28
see 28

Reasoning

WordCloud with Word Frequency >= 25

Man

vector_wc <- df_why %>% filter(treatment == "tactics", gender == "Man") %>% select(reflective_5)
# Create corpus
docs <- VCorpus(VectorSource(vector_wc))

# Clean corpus
docs <-
  docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace) %>%
  tm_map(content_transformer(tolower)) %>%
  tm_map(removeWords, stopwords("english"))

BigramTokenizer <- function(x) c(unlist(lapply(ngrams(words(x), 1), paste, collapse = " "), use.names = FALSE), 
                                 unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE))

# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs,  control = list(tokenize = BigramTokenizer)))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)



# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 25, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))

row.names(df_freetext) <- 1:nrow(df_freetext)
as.matrix(df_freetext[1:20,]) %>% kable(format = 'pipe') %>% print()
word freq
information 358
social 194
media 192
social media 186
misinformation 161
na na 158
misleading 131
share 98
sharing 88
misleading information 77
always 76
post 70
posts 63
true 60
careful 50
check 47
people 41
source 41
keen 38
avoid 36

Woman

vector_wc <- df_why %>% filter(treatment == "tactics", gender == "Woman") %>% select(reflective_5)
# Create corpus
docs <- VCorpus(VectorSource(vector_wc))

# Clean corpus
docs <-
  docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace) %>%
  tm_map(content_transformer(tolower)) %>%
  tm_map(removeWords, stopwords("english"))

BigramTokenizer <- function(x) c(unlist(lapply(ngrams(words(x), 1), paste, collapse = " "), use.names = FALSE), 
                                 unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE))

# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs,  control = list(tokenize = BigramTokenizer)))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)



# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 25, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))

row.names(df_freetext) <- 1:nrow(df_freetext)
as.matrix(df_freetext[1:20,]) %>% kable(format = 'pipe') %>% print()
word freq
information 144
media 113
social 113
social media 110
misinformation 94
na na 94
misleading 73
post 49
share 43
posts 41
true 38
sharing 34
always 33
see 31
misleading information 30
careful 28
everything 28
keen 28
tell 28
sure 25

Combo

WordCloud with Word Frequency >= 25

Man

vector_wc <- df_why %>% filter(treatment == "combo", gender == "Man") %>% select(reflective_5)
# Create corpus
docs <- VCorpus(VectorSource(vector_wc))

# Clean corpus
docs <-
  docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace) %>%
  tm_map(content_transformer(tolower)) %>%
  tm_map(removeWords, stopwords("english"))

BigramTokenizer <- function(x) c(unlist(lapply(ngrams(words(x), 1), paste, collapse = " "), use.names = FALSE), 
                                 unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE))

# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs,  control = list(tokenize = BigramTokenizer)))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)



# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 25, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))

row.names(df_freetext) <- 1:nrow(df_freetext)
as.matrix(df_freetext[1:20,]) %>% kable(format = 'pipe') %>% print()
word freq
information 344
media 184
social 181
social media 175
misinformation 160
na na 157
misleading 128
share 128
post 104
sharing 83
posts 76
always 75
misleading information 70
careful 54
dont 49
true 43
check 41
sure 39
tell 35
see 34

Woman

vector_wc <- df_why %>% filter(treatment == "combo", gender == "Woman") %>% select(reflective_5)
# Create corpus
docs <- VCorpus(VectorSource(vector_wc))

# Clean corpus
docs <-
  docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace) %>%
  tm_map(content_transformer(tolower)) %>%
  tm_map(removeWords, stopwords("english"))

BigramTokenizer <- function(x) c(unlist(lapply(ngrams(words(x), 1), paste, collapse = " "), use.names = FALSE), 
                                 unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE))

# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs,  control = list(tokenize = BigramTokenizer)))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)



# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 25, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))

row.names(df_freetext) <- 1:nrow(df_freetext)
as.matrix(df_freetext[1:20,]) %>% kable(format = 'pipe') %>% print()
word freq
information 162
social 99
media 96
misinformation 96
social media 92
na na 88
share 73
misleading 60
sharing 57
always 54
post 45
posts 44
true 41
careful 30
sure 30
misleading information 28
people 26
dont 24
avoid 23
know 23

Hypothesis Testing

Contain Course Keyword

Current Heuristics (contain any of the following keywords): stop|think|first|check|evaluate|identify|investigate|analyze|research|pause|question|verify|verified|identified|prove|proved|differentiate|distinguish|(tell&difference)|spot|confirm|confirmed|researched|analyzed|before|(tell&between)|ask myself|asked myself

reflective_questions <- c("What are some techniques that people use to create misleading social media posts?",
                          "When browsing your timeline in the last month, did you notice any post that looked misleading? If so, what made it seem misleading?",
                          "How did you feel when you saw the misleading post? If you haven't seen any misleading posts recently, how do you think you would feel?",
                          "Has the Inoculation against Misinformation course changed your behavior on social media? If so, how?",
                          "If you were to tell a friend what you learned in the course, what tip would you share?")
df_4 <- df_why[df_why$gender != "Other", c("treatment", "gender", "reflective_4")]
df_4$contain_course_info <- ifelse(grepl("stop|think|first|check|evaluate|identify|investigate|analyze|research|pause|question|verify|verified|identified|prove|proved|differentiate|distinguish|tell.*difference|spot|confirm|confirmed|researched|analyzed|before|tell.*between|ask myself|asked myself", df_4$reflective_4), 1L, 0L)
df_4 %>% group_by(treatment, gender) %>% summarise(percentage_mentioned = mean(contain_course_info), count_mentioned = sum(contain_course_info), total_in_group = n()) %>% filter(treatment %in% c("combo", "emotion", "tactics"))
output <- df_4[order(df_4$contain_course_info, decreasing = TRUE), c("treatment", "gender", "contain_course_info", "reflective_4")]
colnames(output) <- c("treatment_group", "gender", "contain_course_keyword", reflective_questions[4])
test_1_q4 <- t.test(output %>% filter(treatment_group %in% c("emotion") & gender == "Man") %>% select(contain_course_keyword), 
                    output %>% filter(treatment_group %in% c("emotion") & gender == "Woman") %>% select(contain_course_keyword),
                    alternative = "greater")

test_2_q4 <- t.test(output %>% filter(treatment_group %in% c("tactics"), gender == "Man") %>% select(contain_course_keyword), 
                    output %>% filter(treatment_group %in% c("tactics"), gender == "Woman") %>% select(contain_course_keyword),
                    alternative = "greater")

test_3_q4 <- t.test(output %>% filter(treatment_group %in% c("combo"), gender == "Man") %>% select(contain_course_keyword), 
                    output %>% filter(treatment_group %in% c("combo"), gender == "Woman") %>% select(contain_course_keyword),
                    alternative = "greater")



results_q4 = data.frame(matrix(NA, 3, 6))
colnames(results_q4) = c("estimates","std.err","CI_lw","CI_up","ts","p_val")
rownames(results_q4) = c("Test 1 - Man vs Woman (Emotion)",
                         "Test 2 - Man vs Woman (Reasoning)", 
                         "Test 3 - Man vs Woman (Combo)")

results_q4$estimates <- c(test_1_q4$estimate[1] - test_1_q4$estimate[2],
                          test_2_q4$estimate[1] - test_2_q4$estimate[2],
                          test_3_q4$estimate[1] - test_3_q4$estimate[2])
results_q4$std.err <- c(test_1_q4$stderr,
                        test_2_q4$stderr,
                        test_3_q4$stderr)
results_q4$CI_lw <- c(test_1_q4$conf.int[1],
                      test_2_q4$conf.int[1],
                      test_3_q4$conf.int[1])
results_q4$CI_up <- c(test_1_q4$conf.int[2],
                      test_2_q4$conf.int[2],
                      test_3_q4$conf.int[2])
results_q4$ts <- c(test_1_q4$statistic,
                   test_2_q4$statistic,
                   test_3_q4$statistic)
results_q4$p_val <- c(test_1_q4$p.value,
                      test_2_q4$p.value,
                      test_3_q4$p.value)
results_q4$p_val_holm = p.adjust(results_q4$p_val,"holm")

results_q4
df_5 <- df_why[df_why$gender != "Other", c("treatment", "gender", "reflective_5")]
df_5$contain_course_info <- ifelse(grepl("stop|think|first|check|evaluate|identify|investigate|analyze|research|pause|question|verify|verified|identified|prove|proved|differentiate|distinguish|tell.*difference|spot|confirm|confirmed|researched|analyzed|before|tell.*between|ask myself|asked myself", df_5$reflective_5), 1L, 0L)
df_5 %>% group_by(treatment, gender) %>% summarise(percentage_mentioned = mean(contain_course_info), count_mentioned = sum(contain_course_info), total_in_group = n()) %>% filter(treatment %in% c("combo", "emotion", "tactics"))
output <- df_5[order(df_5$contain_course_info, decreasing = TRUE), c("treatment", "gender", "contain_course_info", "reflective_5")]
colnames(output) <- c("treatment_group", "gender", "contain_course_keyword", reflective_questions[5])
test_1_q5 <- t.test(output %>% filter(treatment_group %in% c("emotion") & gender == "Man") %>% select(contain_course_keyword), 
                    output %>% filter(treatment_group %in% c("emotion") & gender == "Woman") %>% select(contain_course_keyword),
                    alternative = "greater")

test_2_q5 <- t.test(output %>% filter(treatment_group %in% c("tactics"), gender == "Man") %>% select(contain_course_keyword), 
                    output %>% filter(treatment_group %in% c("tactics"), gender == "Woman") %>% select(contain_course_keyword),
                    alternative = "greater")

test_3_q5 <- t.test(output %>% filter(treatment_group %in% c("combo"), gender == "Man") %>% select(contain_course_keyword), 
                    output %>% filter(treatment_group %in% c("combo"), gender == "Woman") %>% select(contain_course_keyword),
                    alternative = "greater")



results_q5 = data.frame(matrix(NA, 3, 6))
colnames(results_q5) = c("estimates","std.err","CI_lw","CI_up","ts","p_val")
rownames(results_q5) = c("Test 1 - Man vs Woman (Emotion)",
                         "Test 2 - Man vs Woman (Reasoning)", 
                         "Test 3 - Man vs Woman (Combo)")

results_q5$estimates <- c(test_1_q5$estimate[1] - test_1_q5$estimate[2],
                          test_2_q5$estimate[1] - test_2_q5$estimate[2],
                          test_3_q5$estimate[1] - test_3_q5$estimate[2])
results_q5$std.err <- c(test_1_q5$stderr,
                        test_2_q5$stderr,
                        test_3_q5$stderr)
results_q5$CI_lw <- c(test_1_q5$conf.int[1],
                      test_2_q5$conf.int[1],
                      test_3_q5$conf.int[1])
results_q5$CI_up <- c(test_1_q5$conf.int[2],
                      test_2_q5$conf.int[2],
                      test_3_q5$conf.int[2])
results_q5$ts <- c(test_1_q5$statistic,
                   test_2_q5$statistic,
                   test_3_q5$statistic)
results_q5$p_val <- c(test_1_q5$p.value,
                      test_2_q5$p.value,
                      test_3_q5$p.value)
results_q5$p_val_holm = p.adjust(results_q5$p_val,"holm")

results_q5

Contain Information Keyword

Current Heuristics (contain any of the following keywords): misinformation|misleading|disinformation|misinfo|disinfo|accurate|true

df_4 <- df_why[df_why$gender != "Other", c("treatment", "gender", "reflective_4")]
df_4$contain_misinfo_keyword <- ifelse(grepl("misinformation|misleading|disinformation|misinfo|disinfo|accurate|true", df_4$reflective_4), 1L, 0L)
df_4 %>% group_by(treatment, gender) %>% summarise(percentage_mentioned = mean(contain_misinfo_keyword), count_mentioned = sum(contain_misinfo_keyword), total_in_group = n()) %>% filter(treatment %in% c("combo", "emotion", "tactics"))
output <- df_4[order(df_4$contain_misinfo_keyword, decreasing = TRUE), c("treatment", "gender", "contain_misinfo_keyword", "reflective_4")]
colnames(output) <- c("treatment_group", "gender", "contain_course_keyword", reflective_questions[4])
test_1_q4 <- t.test(output %>% filter(treatment_group %in% c("emotion") & gender == "Man") %>% select(contain_course_keyword), 
                    output %>% filter(treatment_group %in% c("emotion") & gender == "Woman") %>% select(contain_course_keyword),
                    alternative = "less")

test_2_q4 <- t.test(output %>% filter(treatment_group %in% c("tactics"), gender == "Man") %>% select(contain_course_keyword), 
                    output %>% filter(treatment_group %in% c("tactics"), gender == "Woman") %>% select(contain_course_keyword),
                    alternative = "less")

test_3_q4 <- t.test(output %>% filter(treatment_group %in% c("combo"), gender == "Man") %>% select(contain_course_keyword), 
                    output %>% filter(treatment_group %in% c("combo"), gender == "Woman") %>% select(contain_course_keyword),
                    alternative = "less")



results_q4 = data.frame(matrix(NA, 3, 6))
colnames(results_q4) = c("estimates","std.err","CI_lw","CI_up","ts","p_val")
rownames(results_q4) = c("Test 1 - Man vs Woman (Emotion)",
                         "Test 2 - Man vs Woman (Reasoning)", 
                         "Test 3 - Man vs Woman (Combo)")

results_q4$estimates <- c(test_1_q4$estimate[1] - test_1_q4$estimate[2],
                          test_2_q4$estimate[1] - test_2_q4$estimate[2],
                          test_3_q4$estimate[1] - test_3_q4$estimate[2])
results_q4$std.err <- c(test_1_q4$stderr,
                        test_2_q4$stderr,
                        test_3_q4$stderr)
results_q4$CI_lw <- c(test_1_q4$conf.int[1],
                      test_2_q4$conf.int[1],
                      test_3_q4$conf.int[1])
results_q4$CI_up <- c(test_1_q4$conf.int[2],
                      test_2_q4$conf.int[2],
                      test_3_q4$conf.int[2])
results_q4$ts <- c(test_1_q4$statistic,
                   test_2_q4$statistic,
                   test_3_q4$statistic)
results_q4$p_val <- c(test_1_q4$p.value,
                      test_2_q4$p.value,
                      test_3_q4$p.value)
results_q4$p_val_holm = p.adjust(results_q4$p_val,"holm")

results_q4
df_5 <- df_why[df_why$gender != "Other", c("treatment", "gender", "reflective_5")]
df_5$contain_misinfo_keyword <- ifelse(grepl("misinformation|misleading|disinformation|misinfo|disinfo|accurate|true", df_5$reflective_5), 1L, 0L)
df_5 %>% group_by(treatment, gender) %>% summarise(percentage_mentioned = mean(contain_misinfo_keyword), count_mentioned = sum(contain_misinfo_keyword), total_in_group = n()) %>% filter(treatment %in% c("combo", "emotion", "tactics"))
output <- df_5[order(df_5$contain_misinfo_keyword, decreasing = TRUE), c("treatment", "gender", "contain_misinfo_keyword", "reflective_5")]
colnames(output) <- c("treatment_group", "gender", "contain_course_keyword", reflective_questions[5])
test_1_q5 <- t.test(output %>% filter(treatment_group %in% c("emotion") & gender == "Man") %>% select(contain_course_keyword), 
                    output %>% filter(treatment_group %in% c("emotion") & gender == "Woman") %>% select(contain_course_keyword),
                    alternative = "less")

test_2_q5 <- t.test(output %>% filter(treatment_group %in% c("tactics"), gender == "Man") %>% select(contain_course_keyword), 
                    output %>% filter(treatment_group %in% c("tactics"), gender == "Woman") %>% select(contain_course_keyword),
                    alternative = "less")

test_3_q5 <- t.test(output %>% filter(treatment_group %in% c("combo"), gender == "Man") %>% select(contain_course_keyword), 
                    output %>% filter(treatment_group %in% c("combo"), gender == "Woman") %>% select(contain_course_keyword),
                    alternative = "less")



results_q5 = data.frame(matrix(NA, 3, 6))
colnames(results_q5) = c("estimates","std.err","CI_lw","CI_up","ts","p_val")
rownames(results_q5) = c("Test 1 - Man vs Woman (Emotion)",
                         "Test 2 - Man vs Woman (Reasoning)", 
                         "Test 3 - Man vs Woman (Combo)")

results_q5$estimates <- c(test_1_q5$estimate[1] - test_1_q5$estimate[2],
                          test_2_q5$estimate[1] - test_2_q5$estimate[2],
                          test_3_q5$estimate[1] - test_3_q5$estimate[2])
results_q5$std.err <- c(test_1_q5$stderr,
                        test_2_q5$stderr,
                        test_3_q5$stderr)
results_q5$CI_lw <- c(test_1_q5$conf.int[1],
                      test_2_q5$conf.int[1],
                      test_3_q5$conf.int[1])
results_q5$CI_up <- c(test_1_q5$conf.int[2],
                      test_2_q5$conf.int[2],
                      test_3_q5$conf.int[2])
results_q5$ts <- c(test_1_q5$statistic,
                   test_2_q5$statistic,
                   test_3_q5$statistic)
results_q5$p_val <- c(test_1_q5$p.value,
                      test_2_q5$p.value,
                      test_3_q5$p.value)
results_q5$p_val_holm = p.adjust(results_q5$p_val,"holm")

results_q5