rm(list = ls())
library(openxlsx)
pacman::p_load(DT, estimatr, kableExtra, readr, reshape2, tidyverse, xtable, dataMaid, ggcorrplot, ggmap, rpart, rpart.plot, pollster, wordcloud, tm, topicmodels, ldatuning, lda,SnowballC, pals, flextable, RColorBrewer, hrbrthemes, janitor, purrr, gridExtra, cowplot, rcompanion, nnet, texreg, compareGroups, factoextra, cluster, fastDummies, simputation, sentimentr, politeness, textir, xtable, plotrix, ggplot2)
set.seed(94305)
dir.create(file.path('tables'), showWarnings = FALSE)
dir.create(file.path('figures'), showWarnings = FALSE)
dir.create(file.path('freetext'), showWarnings = FALSE)library(dplyr)
files <- list.files('./data', full.names = TRUE)
# pre_files <- files[grepl("Main PRE", files)]
# post_files <- files[grepl("Main POST", files)]
# INPUT_FILENAME_PRE <- pre_files[which.max(file.info(pre_files)$mtime)]
# INPUT_FILENAME_POST <- post_files[which.max(file.info(post_files)$mtime)]
# df_full_pre <- read_csv(INPUT_FILENAME_PRE) %>% clean_names %>% remove_empty()
# df_full_post <- read_csv(INPUT_FILENAME_POST) %>% clean_names %>% remove_empty()
followup_files <- files[grepl("Followup", files)]
INPUT_FILENAME_FOLLOWUP <- followup_files[which.max(file.info(followup_files)$mtime)]
df_full_followup <- read_csv(INPUT_FILENAME_FOLLOWUP) %>% clean_names %>% remove_empty()
# df_final <- readRDS("../main_analysis/data/df_final.rds")
df <- df_full_followup %>% filter(finished == "True") %>% filter(consent_course == "Yes, I agree") %>% filter(status == "IP Address")
contact_list <- read.csv("../contact_lists/main/misinfo_followup.csv")
contact_list <- contact_list %>% rename(external_reference = ExternalDataReference)
contact_list$external_reference <- paste0("+", as.character(contact_list$external_reference))
df <- left_join(df, contact_list, by = "external_reference")
# df_post_valid_phone <- unique(df_final$phone_num)
# df_post <- df_full_post[df_full_post$phone_num %in% df_post_valid_phone, ]# clean_phone_number <- function(phone){
# if (is.na(phone)){
# return ("")
# }
# if (((substr(phone, 1, 1) == "0") | (substr(phone, 1, 1) == "O") | (substr(phone, 1, 1) == "+")) & nchar(phone) == 10){
# phone <- substr(phone, 2, nchar(phone))
# } else if (((substr(phone, 1, 3) == "254")) & nchar(phone) == 12){
# phone <- substr(phone, 4, nchar(phone))
# } else if (((substr(phone, 1, 4) == "2540")) & nchar(phone) == 13){
# phone <- substr(phone, 5, nchar(phone))
# } else if (((substr(phone, 1, 4) == "+254")) & nchar(phone) == 13){
# phone <- substr(phone, 5, nchar(phone))
# } else if (((substr(phone, 1, 4) == "±254")) & nchar(phone) == 13){
# phone <- substr(phone, 5, nchar(phone))
# } else if (((substr(phone, 1, 4) == "†254")) & nchar(phone) == 13){
# phone <- substr(phone, 5, nchar(phone))
# } else if (((substr(phone, 1, 5) == "+2540")) & nchar(phone) == 14){
# phone <- substr(phone, 6, nchar(phone))
# } else {
# phone <- ""
# }
# return (phone)
#
#
# }
# df_post_phone <- sapply(df_post_valid_phone, function(x) clean_phone_number(x))free_text_columns <- colnames(df)[startsWith(colnames(df), "reflective")]
reflective_questions <- c("What are some techniques that people use to create misleading social media posts?",
"When browsing your timeline in the last month, did you notice any post that looked misleading? If so, what made it seem misleading?",
"How did you feel when you saw the misleading post? If you haven't seen any misleading posts recently, how do you think you would feel?",
"Has the Inoculation against Misinformation course changed your behavior on social media? If so, how?",
"If you were to tell a friend what you learned in the course, what tip would you share?")
# for (i in 1:5){
# output <- df[free_text_columns[i]]
# colnames(output) <- reflective_questions[i]
# write.csv(output, paste0("./freetext/reflective_questions_", i, ".csv"))
# }options(scipen=999)
treatments <- c("control_delayed", "control_alternative", "tactics", "emotion", "combo", "overall")
for (ftc in free_text_columns){
question_num <- substr(ftc, 12, 12)
output_name <- paste0("Reflective Question ", question_num)
total_output <- c()
for (t in treatments){
if (t != "overall"){
q <- df %>% filter(treatment == t) %>% select(ftc)
} else {
q <- df %>% select(ftc)
}
num_characters_mean <- mean(sapply(q, function(x) nchar(x)), na.rm = T)
num_characters_se <- std.error(sapply(q, function(x) nchar(x)), na.rm = T)
num_word_mean <- mean(sapply(q, function(x) str_count(x, "\\w+")), na.rm = T)
num_word_se <- std.error(sapply(q, function(x) str_count(x, "\\w+")), na.rm = T)
include_yes_mean <- mean(sapply(q, function(x) grepl("\\byes\\b", x, ignore.case = TRUE)), na.rm = T)
include_yes_se <- std.error(sapply(q, function(x) grepl("\\byes\\b", x, ignore.case = TRUE)), na.rm = T)
include_no_mean <- mean(sapply(q, function(x) grepl("\\bno\\b", x, ignore.case = TRUE) & !grepl("\\byes\\b", x, ignore.case = TRUE)), na.rm = T)
include_no_se <- std.error(sapply(q, function(x) grepl("\\bno\\b", x, ignore.case = TRUE) & !grepl("\\byes\\b", x, ignore.case = TRUE)), na.rm = T)
only_yes_mean <- mean(sapply(q, function(x) grepl("\\byes\\b", x, ignore.case = TRUE) & nchar(x) <= 5), na.rm = T)
only_yes_se <- std.error(sapply(q, function(x) grepl("\\byes\\b", x, ignore.case = TRUE) & nchar(x) <= 5), na.rm = T)
only_no_mean <- mean(sapply(q, function(x) grepl("\\bno\\b", x, ignore.case = TRUE) & !grepl("\\byes\\b", x, ignore.case = TRUE) & nchar(x) <= 5), na.rm = T)
only_no_se <- std.error(sapply(q, function(x) grepl("\\bno\\b", x, ignore.case = TRUE) & !grepl("\\byes\\b", x, ignore.case = TRUE) & nchar(x) <= 5), na.rm = T)
more_than_yes_mean <- mean(sapply(q, function(x) grepl("\\byes\\b", x, ignore.case = TRUE) & nchar(x) > 5), na.rm = T)
more_than_yes_se <- std.error(sapply(q, function(x) grepl("\\byes\\b", x, ignore.case = TRUE) & nchar(x) > 5), na.rm = T)
more_than_no_mean <- mean(sapply(q, function(x) grepl("\\bno\\b", x, ignore.case = TRUE) & !grepl("\\byes\\b", x, ignore.case = TRUE) & nchar(x) > 5), na.rm = T)
more_than_no_se <- std.error(sapply(q, function(x) grepl("\\bno\\b", x, ignore.case = TRUE) & !grepl("\\byes\\b", x, ignore.case = TRUE) & nchar(x) > 5), na.rm = T)
other_mean <- mean(sapply(q, function(x) !(grepl("\\bno\\b", x, ignore.case = TRUE) | grepl("\\byes\\b", x, ignore.case = TRUE))), na.rm = T)
other_se <- std.error(sapply(q, function(x) !(grepl("\\bno\\b", x, ignore.case = TRUE) | grepl("\\byes\\b", x, ignore.case = TRUE))), na.rm = T)
output <- c(round(num_characters_mean, 4), paste0("(", round(num_characters_se, 2), ")"),
round(num_word_mean, 4), paste0("(", round(num_word_se, 2), ")"),
round(include_yes_mean, 4), paste0("(", round(include_yes_se, 2), ")"),
round(include_no_mean, 4), paste0("(", round(include_no_se, 2), ")"),
round(only_yes_mean, 4), paste0("(", round(only_yes_se, 2), ")"),
round(only_no_mean, 4), paste0("(", round(only_no_se, 2), ")"),
round(more_than_yes_mean, 4), paste0("(", round(more_than_yes_se, 2), ")"),
round(more_than_no_mean, 4), paste0("(", round(more_than_no_se, 2), ")"),
round(other_mean, 4), paste0("(", round(other_se, 2), ")"))
total_output <- cbind(total_output, output)
}
colnames(total_output) <- c("Control", "Reminder", "Tactics", "Emotion", "Combo", "Overall")
rownames(total_output) <- c("Number of Characters", "",
"Number of Words", "",
"Yes Responses", "",
"No Responses", "",
"Only 'Yes' Responses", "",
"Only 'No' Responses", "",
"More than 'Yes' Responses", "",
"More than 'No' Responses", "",
"Other Responses", "")
print(xtable(total_output, type = "latex", caption = paste0("Descriptive Statistics for ", output_name)), file = paste0("./freetext/", ftc, ".latex"))
}df$num_characters_4 <- sapply(df$reflective_4, function(x) nchar(x))
df$num_characters_5 <- sapply(df$reflective_5, function(x) nchar(x))
test_4 <- t.test(df$num_characters_4[df$treatment == "emotion"], df$num_characters_4[df$treatment == "control_alternative"], alternative = c("greater"))
test_5 <- t.test(df$num_characters_5[df$treatment == "emotion"], df$num_characters_5[df$treatment == "control_alternative"], alternative = c("greater"))
paste0("diff: ", round(test_4$estimate[1] - test_4$estimate[2], 1), " se: ", round(test_4$stderr, 2))## [1] "diff: 3.3 se: 1.82"
paste0("diff: ", round(test_5$estimate[1] - test_5$estimate[2], 1), " se: ", round(test_5$stderr, 2))## [1] "diff: 2.9 se: 1.91"
Has the Inoculation against Misinformation course changed your behavior on social media? If so, how?
Current Heuristics (contain any of the following keywords): stop|think|first|check|evaluate|identify|investigate|analyze|research|pause|question|verify|verified|identified|prove|proved|differentiate|distinguish|(tell&difference)|spot|confirm|confirmed|researched|analyzed|before|(tell&between)|ask myself|asked myself
df_4 <- df[, c("treatment", "reflective_4")]
df_4$contain_course_info <- ifelse(grepl("stop|think|first|check|evaluate|identify|investigate|analyze|research|pause|question|verify|verified|identified|prove|proved|differentiate|distinguish|tell.*difference|spot|confirm|confirmed|researched|analyzed|before|tell.*between|ask myself|asked myself", df_4$reflective_4), 1L, 0L)
df_4 %>% group_by(treatment) %>% summarise(percentage_mentioned = mean(contain_course_info), count_mentioned = sum(contain_course_info), total_in_group = n())output <- df_4[order(df_4$contain_course_info, decreasing = TRUE), c("treatment", "contain_course_info", "reflective_4")]
colnames(output) <- c("treatment_group", "contain_course_keyword", reflective_questions[4])
write.csv(output, paste0("./freetext/reflective_questions_4_heuristics.csv"))datatable(output[output$contain_course_keyword == 1, 3])datatable(output[output$contain_course_keyword == 0, 3])test_1_q4 <- t.test(output %>% filter(treatment_group %in% c("emotion", "tactics", "combo")) %>% select(contain_course_keyword),
output %>% filter(treatment_group %in% c("control_alternative")) %>% select(contain_course_keyword),
alternative = "greater")
test_2_q4 <- t.test(output %>% filter(treatment_group %in% c("control_delayed")) %>% select(contain_course_keyword),
output %>% filter(treatment_group %in% c("control_alternative")) %>% select(contain_course_keyword),
alternative = "greater")
test_3_q4 <- t.test(output %>% filter(treatment_group %in% c("emotion")) %>% select(contain_course_keyword),
output %>% filter(treatment_group %in% c("control_alternative")) %>% select(contain_course_keyword),
alternative = "greater")
test_4_q4 <- t.test(output %>% filter(treatment_group %in% c("tactics")) %>% select(contain_course_keyword),
output %>% filter(treatment_group %in% c("control_alternative")) %>% select(contain_course_keyword),
alternative = "greater")
test_5_q4 <- t.test(output %>% filter(treatment_group %in% c("combo")) %>% select(contain_course_keyword),
output %>% filter(treatment_group %in% c("control_alternative")) %>% select(contain_course_keyword),
alternative = "greater")
results_q4 = data.frame(matrix(NA,5,6))
colnames(results_q4) = c("estimates","std.err","CI_lw","CI_up","ts","p_val")
rownames(results_q4) = c("Test 1 - All Treatment Courses v. Reminder",
"Test 2 - Control (Combo) v. Reminder",
"Test 3 - Emotion v. Reminder",
"Test 4 - Tactics v. Reminder",
"Test 5 - Combo v. Reminder")
results_q4$estimates <- c(test_1_q4$estimate[1] - test_1_q4$estimate[2],
test_2_q4$estimate[1] - test_2_q4$estimate[2],
test_3_q4$estimate[1] - test_3_q4$estimate[2],
test_4_q4$estimate[1] - test_4_q4$estimate[2],
test_5_q4$estimate[1] - test_5_q4$estimate[2])
results_q4$std.err <- c(test_1_q4$stderr,
test_2_q4$stderr,
test_3_q4$stderr,
test_4_q4$stderr,
test_5_q4$stderr)
results_q4$CI_lw <- c(test_1_q4$conf.int[1],
test_2_q4$conf.int[1],
test_3_q4$conf.int[1],
test_4_q4$conf.int[1],
test_5_q4$conf.int[1])
results_q4$CI_up <- c(test_1_q4$conf.int[2],
test_2_q4$conf.int[2],
test_3_q4$conf.int[2],
test_4_q4$conf.int[2],
test_5_q4$conf.int[2])
results_q4$ts <- c(test_1_q4$statistic,
test_2_q4$statistic,
test_3_q4$statistic,
test_4_q4$statistic,
test_5_q4$statistic)
results_q4$p_val <- c(test_1_q4$p.value,
test_2_q4$p.value,
test_3_q4$p.value,
test_4_q4$p.value,
test_5_q4$p.value)
results_q4$p_val_holm = p.adjust(results_q4$p_val,"holm")
results_q4plot_gen = function(data,color,ylab,xlab,y_min,y_max,title,num_size,baseline,tests){
pic = ggplot(data=data,aes(x=label,y=mean))+
theme_bw()+
theme(axis.line.y = element_line(colour="black"),panel.border = element_blank(),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
legend.title = element_blank(),
legend.position = c(.9,.8),legend.justification = c("right", "bottom"),
legend.key = element_rect(colour = "transparent"),
legend.box.just = "right", legend.text = element_text(size=12), legend.margin = margin(6, 6, 6, 6),
legend.box.background = element_rect( fill="transparent", size=1),legend.background = element_blank()) +
geom_bar(stat='identity',width=0.75,color="black",fill=color) +
geom_errorbar(aes(ymin=mean-1.96*sd, ymax = mean+1.96*sd),width=0.1, size=1.5, position = position_dodge(1))
if (baseline){
pic = pic + geom_text(aes(label=as.character(formatC(mean,digits=3,format="f")),y=mean+sign(mean)*sd),vjust = -2.5,size=num_size)+
geom_text(aes(label=paste("[",as.character(round(mean/mean[2]*100)),"%]",sep=""),y=mean+sign(mean)*sd),vjust = -1,size=num_size-1)}
if (!baseline){
pic = pic + geom_text(aes(label=as.character(formatC(mean,digits=3,format="f")),y=mean+sd),vjust = -1,size=num_size)
}
pic = pic + geom_hline(yintercept = 0)+
geom_segment(aes(x = 2, y = 0.92, xend = 5, yend = 0.92), color = "orange2",size=2)+
geom_segment(aes(x = 2, y = 0.92, xend = 2, yend = 0.72), color = "orange2",size=2)+
geom_segment(aes(x = 5, y = 0.92, xend = 5, yend = 0.60), color = "orange2",size=2)+
geom_segment(aes(x = 2, y = 0.82, xend = 4, yend = 0.82), color = "pink2",size=2)+
geom_segment(aes(x = 2, y = 0.82, xend = 2, yend = 0.72), color = "pink2",size=2)+
geom_segment(aes(x = 4, y = 0.82, xend = 4, yend = 0.56), color = "pink2",size=2)+
geom_segment(aes(x = 2, y = 0.72, xend = 3, yend = 0.72), color = "turquoise2",size=2)+
geom_segment(aes(x = 2, y = 0.72, xend = 2, yend = 0.72), color = "turquoise2",size=2)+
geom_segment(aes(x = 3, y = 0.72, xend = 3, yend = 0.62), color = "turquoise2",size=2)+
geom_segment(aes(x = 1, y = 0.85, xend = 2, yend = 0.85), color = "gray70",size=2)+
geom_segment(aes(x = 1, y = 0.85, xend = 1, yend = 0.68), color = "gray70",size=2)+
geom_segment(aes(x = 2, y = 0.85, xend = 2, yend = 0.68), color = "gray70",size=2)+
ggplot2::annotate("text",x=4.5,y=0.875,label = paste0(tests$estimates[4], "\n(",tests$std.err[4],")"),size=11) +
ggplot2::annotate("text",x=3.5,y=0.775,label = paste0(tests$estimates[3], "\n(",tests$std.err[3],")"),size=11) +
ggplot2::annotate("text",x=2.5,y=0.675,label = paste0(tests$estimates[2], "\n(",tests$std.err[2],")"),size=11) +
ggplot2::annotate("text",x=1.5,y=0.805,label = paste0(tests$estimates[1], "\n(",tests$std.err[1],")"),size=11) +
#annotate("text",x=4.25,y=1.02,label = "Diff:\n",size=11) +
#annotate("text",x=3.25,y=0.92,label = "Diff:\n",size=11) +
#annotate("text",x=2.25,y=0.82,label = "Diff:\n",size=11) +
#annotate("text",x=1.25,y=0.95,label = "Diff:\n",size=11) +
labs(y=ylab,x=xlab)+
theme(axis.text.x = element_text(color = "black", size = 40, angle = 0, hjust = .5, vjust = 5, face = "plain"),
axis.text.y = element_text(color = "black", size = 45, angle = 0, hjust = 0, vjust = .5, face = "plain",
margin=unit(rep(0.5,4),"cm")),
axis.title.x = element_text(color = "black", size = 55, angle = 0, hjust = .5, vjust = 3, face = "bold"),
axis.title.y = element_text(color = "black", size = 60, angle = 90, hjust = .5, vjust = .5, face = "bold"),
axis.ticks.length.y = unit(-0.25,"cm"), axis.ticks.x=element_blank())+
scale_y_continuous(limits = c(y_min,y_max)) +
scale_x_discrete(limits = data$label) +
ggtitle(title) +
theme(plot.title = element_text(face='bold', size=50, hjust=0.5, vjust=0.5))
return(pic)
}plot_q4_data = data.frame(matrix(NA,5,3))
colnames(plot_q4_data) = c("label","mean","sd")
plot_q4_data[,1:2] = aggregate(contain_course_keyword~treatment_group,output,mean)
plot_q4_data[,3] = aggregate(contain_course_keyword~treatment_group,output,std.error)[,2]
plot_q4_data$label[plot_q4_data$label == "control_delayed"] <- "No-course Control"
plot_q4_data$label[plot_q4_data$label == "control_alternative"] <- "Reminder Control"
plot_q4_data$label[plot_q4_data$label == "tactics"] <- "Info"
plot_q4_data$label[plot_q4_data$label == "emotion"] <- "Emotions"
plot_q4_data$label[plot_q4_data$label == "combo"] <- "Combo"
plot_q4_data <- plot_q4_data %>%
slice(match(c("No-course Control", "Reminder Control", "Info", "Emotions", "Combo"), label))
tests_plot_q4 = data.frame(matrix(NA),4,2)
tests_plot_q4 = round(results_q4[2:5,c("estimates","std.err")],3)
plot_gen(plot_q4_data,c("gray70","royalblue3","turquoise2","pink2","orange2"),"Proportion of Participant Responses containing Keywords","Assignment group",-0.0003,1,"",5,TRUE, tests_plot_q4)vector_wc <- df_4$reflective_4
# Create corpus
docs <- Corpus(VectorSource(vector_wc))
# Clean corpus
docs <-
docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace) %>%
tm_map(content_transformer(tolower)) %>%
tm_map(removeWords, stopwords("english"))
# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)
# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 1, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))# dataset_names <- list('Contain' = output[output$contain_course_keyword == 1, ],
# 'Does not contain' = output[output$contain_course_keyword == 0, ])
# openxlsx::write.xlsx(dataset_names, paste0("./freetext/reflective_questions_4_heuristics.xlsx"))If you were to tell a friend what you learned in the course, what tip would you share?
Current Heuristics (contain any of the following keywords): stop|think|first|check|evaluate|identify|investigate|analyze|research|pause|question|verify|verified|identified|prove|proved|differentiate|distinguish|(tell&difference)|spot|confirm|confirmed|researched|analyzed|before|(tell&between)|ask myself|asked myself
df_5 <- df[, c("treatment", "reflective_5")]
df_5$contain_course_info <- ifelse(grepl("stop|think|first|check|evaluate|identify|investigate|analyze|research|pause|question|verify|verified|identified|prove|proved|differentiate|distinguish|tell.*difference|spot|confirm|confirmed|researched|analyzed|before|tell.*between|ask myself|asked myself", df_5$reflective_5), 1L, 0L)
df_5 %>% group_by(treatment) %>% summarise(percentage_mentioned = mean(contain_course_info), count_mentioned = sum(contain_course_info), total_in_group = n())output <- df_5[order(df_5$contain_course_info, decreasing = TRUE), c("treatment", "contain_course_info", "reflective_5")]
colnames(output) <- c("treatment_group", "contain_course_keyword", reflective_questions[5])
# write.csv(output, paste0("./freetext/reflective_questions_5_heuristics.csv"))
# library(openxlsx)
# dataset_names <- list('Contain' = output[output$contain_course_keyword == 1, ],
# 'Does not contain' = output[output$contain_course_keyword == 0, ])
# openxlsx::write.xlsx(dataset_names, paste0("./freetext/reflective_questions_5_heuristics.xlsx"))datatable(output[output$contain_course_keyword == 1, 3])datatable(output[output$contain_course_keyword == 0, 3])test_1_q5 <- t.test(output %>% filter(treatment_group %in% c("emotion", "tactics", "combo")) %>% select(contain_course_keyword),
output %>% filter(treatment_group %in% c("control_alternative")) %>% select(contain_course_keyword),
alternative = "greater")
test_2_q5 <- t.test(output %>% filter(treatment_group %in% c("control_delayed")) %>% select(contain_course_keyword),
output %>% filter(treatment_group %in% c("control_alternative")) %>% select(contain_course_keyword),
alternative = "greater")
test_3_q5 <- t.test(output %>% filter(treatment_group %in% c("emotion")) %>% select(contain_course_keyword),
output %>% filter(treatment_group %in% c("control_alternative")) %>% select(contain_course_keyword),
alternative = "greater")
test_4_q5 <- t.test(output %>% filter(treatment_group %in% c("tactics")) %>% select(contain_course_keyword),
output %>% filter(treatment_group %in% c("control_alternative")) %>% select(contain_course_keyword),
alternative = "greater")
test_5_q5 <- t.test(output %>% filter(treatment_group %in% c("combo")) %>% select(contain_course_keyword),
output %>% filter(treatment_group %in% c("control_alternative")) %>% select(contain_course_keyword),
alternative = "greater")
results_q5 = data.frame(matrix(NA,5,6))
colnames(results_q5) = c("estimates","std.err","CI_lw","CI_up","ts","p_val")
rownames(results_q5) = c("Test 1 - All Treatment Courses v. Reminder",
"Test 2 - Control (Combo) v. Reminder",
"Test 3 - Emotion v. Reminder",
"Test 4 - Tactics v. Reminder",
"Test 5 - Combo v. Reminder")
results_q5$estimates <- c(test_1_q5$estimate[1] - test_1_q5$estimate[2],
test_2_q5$estimate[1] - test_2_q5$estimate[2],
test_3_q5$estimate[1] - test_3_q5$estimate[2],
test_4_q5$estimate[1] - test_4_q5$estimate[2],
test_5_q5$estimate[1] - test_5_q5$estimate[2])
results_q5$std.err <- c(test_1_q5$stderr,
test_2_q5$stderr,
test_3_q5$stderr,
test_4_q5$stderr,
test_5_q5$stderr)
results_q5$CI_lw <- c(test_1_q5$conf.int[1],
test_2_q5$conf.int[1],
test_3_q5$conf.int[1],
test_4_q5$conf.int[1],
test_5_q5$conf.int[1])
results_q5$CI_up <- c(test_1_q5$conf.int[2],
test_2_q5$conf.int[2],
test_3_q5$conf.int[2],
test_4_q5$conf.int[2],
test_5_q5$conf.int[2])
results_q5$ts <- c(test_1_q5$statistic,
test_2_q5$statistic,
test_3_q5$statistic,
test_4_q5$statistic,
test_5_q5$statistic)
results_q5$p_val <- c(test_1_q5$p.value,
test_2_q5$p.value,
test_3_q5$p.value,
test_4_q5$p.value,
test_5_q5$p.value)
results_q5$p_val_holm = p.adjust(results_q5$p_val,"holm")
results_q5plot_q5_data = data.frame(matrix(NA,5,3))
colnames(plot_q5_data) = c("label","mean","sd")
plot_q5_data[,1:2] = aggregate(contain_course_keyword~treatment_group,output,mean)
plot_q5_data[,3] = aggregate(contain_course_keyword~treatment_group,output,std.error)[,2]
plot_q5_data$label[plot_q5_data$label == "control_delayed"] <- "No-course Control"
plot_q5_data$label[plot_q5_data$label == "control_alternative"] <- "Reminder Control"
plot_q5_data$label[plot_q5_data$label == "tactics"] <- "Info"
plot_q5_data$label[plot_q5_data$label == "emotion"] <- "Emotions"
plot_q5_data$label[plot_q5_data$label == "combo"] <- "Combo"
plot_q5_data <- plot_q5_data %>%
slice(match(c("No-course Control", "Reminder Control", "Info", "Emotions", "Combo"), label))
tests_plot_q5 = data.frame(matrix(NA),4,2)
tests_plot_q5 = round(results_q5[2:5,c("estimates","std.err")],3)
plot_gen(plot_q5_data,c("gray70","royalblue3","turquoise2","pink2","orange2"),"Proportion of Participant Responses containing Keywords","Assignment group",-0.0003,1,"",5,TRUE, tests_plot_q5)vector_wc <- df_5$reflective_5
# Create corpus
docs <- Corpus(VectorSource(vector_wc))
# Clean corpus
docs <-
docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace) %>%
tm_map(content_transformer(tolower)) %>%
tm_map(removeWords, stopwords("english"))
# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)
# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 1, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))df_final <- readRDS("df_final.rds")
df_why <- df_final %>% select(user, accuracy, treatment, att_check_pre, att_check_post,
reflective_1, reflective_2, reflective_3, reflective_4, reflective_5,
gender)
df_why <- df_why %>% distinct()WordCloud with Word Frequency >= 25
Man
vector_wc <- df_why %>% filter(treatment == "emotion", gender == "Man") %>% select(reflective_4)
# Create corpus
docs <- VCorpus(VectorSource(vector_wc))
# Clean corpus
docs <-
docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace) %>%
tm_map(content_transformer(tolower)) %>%
tm_map(removeWords, stopwords("english"))
BigramTokenizer <- function(x) c(unlist(lapply(ngrams(words(x), 1), paste, collapse = " "), use.names = FALSE),
unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE))
# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs, control = list(tokenize = BigramTokenizer)))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)
# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 25, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))row.names(df_freetext) <- 1:nrow(df_freetext)
as.matrix(df_freetext[1:20,]) %>% kable(format = 'pipe') %>% print()| word | freq |
|---|---|
| yes | 343 |
| information | 275 |
| na na | 203 |
| na yes | 148 |
| share | 110 |
| media | 105 |
| social | 102 |
| social media | 100 |
| post | 97 |
| misinformation | 90 |
| sharing | 73 |
| posts | 72 |
| now | 66 |
| misleading | 65 |
| changed | 63 |
| dont | 61 |
| can | 55 |
| know | 51 |
| true | 49 |
| careful | 42 |
Woman
vector_wc <- df_why %>% filter(treatment == "emotion", gender == "Woman") %>% select(reflective_4)
# Create corpus
docs <- VCorpus(VectorSource(vector_wc))
# Clean corpus
docs <-
docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace) %>%
tm_map(content_transformer(tolower)) %>%
tm_map(removeWords, stopwords("english"))
BigramTokenizer <- function(x) c(unlist(lapply(ngrams(words(x), 1), paste, collapse = " "), use.names = FALSE),
unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE))
# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs, control = list(tokenize = BigramTokenizer)))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)
# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 25, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))row.names(df_freetext) <- 1:nrow(df_freetext)
as.matrix(df_freetext[1:20,]) %>% kable(format = 'pipe') %>% print()| word | freq |
|---|---|
| yes | 207 |
| information | 158 |
| na na | 103 |
| social | 85 |
| media | 84 |
| social media | 81 |
| na yes | 74 |
| post | 73 |
| share | 73 |
| dont | 54 |
| sharing | 51 |
| misinformation | 47 |
| true | 37 |
| believe | 33 |
| misleading | 33 |
| posts | 33 |
| changed | 32 |
| know | 31 |
| now | 30 |
| sure | 26 |
WordCloud with Word Frequency >= 25
Man
vector_wc <- df_why %>% filter(treatment == "tactics", gender == "Man") %>% select(reflective_4)
# Create corpus
docs <- VCorpus(VectorSource(vector_wc))
# Clean corpus
docs <-
docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace) %>%
tm_map(content_transformer(tolower)) %>%
tm_map(removeWords, stopwords("english"))
BigramTokenizer <- function(x) c(unlist(lapply(ngrams(words(x), 1), paste, collapse = " "), use.names = FALSE),
unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE))
# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs, control = list(tokenize = BigramTokenizer)))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)
# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 25, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))row.names(df_freetext) <- 1:nrow(df_freetext)
as.matrix(df_freetext[1:20,]) %>% kable(format = 'pipe') %>% print()| word | freq |
|---|---|
| yes | 356 |
| information | 289 |
| na na | 164 |
| na yes | 133 |
| social | 130 |
| media | 128 |
| social media | 125 |
| post | 84 |
| posts | 80 |
| now | 77 |
| share | 76 |
| misinformation | 74 |
| sharing | 69 |
| misleading | 68 |
| can | 59 |
| keen | 56 |
| dont | 54 |
| changed | 52 |
| careful | 49 |
| able | 44 |
Woman
vector_wc <- df_why %>% filter(treatment == "tactics", gender == "Woman") %>% select(reflective_4)
# Create corpus
docs <- VCorpus(VectorSource(vector_wc))
# Clean corpus
docs <-
docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace) %>%
tm_map(content_transformer(tolower)) %>%
tm_map(removeWords, stopwords("english"))
BigramTokenizer <- function(x) c(unlist(lapply(ngrams(words(x), 1), paste, collapse = " "), use.names = FALSE),
unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE))
# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs, control = list(tokenize = BigramTokenizer)))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)
# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 25, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))row.names(df_freetext) <- 1:nrow(df_freetext)
as.matrix(df_freetext[1:20,]) %>% kable(format = 'pipe') %>% print()| word | freq |
|---|---|
| yes | 183 |
| information | 133 |
| na na | 95 |
| post | 66 |
| na yes | 63 |
| media | 60 |
| social | 59 |
| social media | 58 |
| dont | 44 |
| share | 41 |
| misleading | 40 |
| misinformation | 38 |
| posts | 37 |
| now | 34 |
| true | 30 |
| know | 28 |
| sharing | 27 |
| sure | 27 |
| changed | 26 |
| yesi | 26 |
WordCloud with Word Frequency >= 25
Man
vector_wc <- df_why %>% filter(treatment == "combo", gender == "Man") %>% select(reflective_4)
# Create corpus
docs <- VCorpus(VectorSource(vector_wc))
# Clean corpus
docs <-
docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace) %>%
tm_map(content_transformer(tolower)) %>%
tm_map(removeWords, stopwords("english"))
BigramTokenizer <- function(x) c(unlist(lapply(ngrams(words(x), 1), paste, collapse = " "), use.names = FALSE),
unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE))
# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs, control = list(tokenize = BigramTokenizer)))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)
# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 25, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))row.names(df_freetext) <- 1:nrow(df_freetext)
as.matrix(df_freetext[1:20,]) %>% kable(format = 'pipe') %>% print()| word | freq |
|---|---|
| yes | 385 |
| information | 260 |
| na na | 167 |
| na yes | 160 |
| post | 127 |
| media | 120 |
| social | 120 |
| social media | 117 |
| share | 107 |
| misleading | 91 |
| posts | 90 |
| sharing | 78 |
| dont | 74 |
| now | 71 |
| misinformation | 64 |
| can | 53 |
| changed | 49 |
| misleading information | 46 |
| made | 42 |
| research | 38 |
Woman
vector_wc <- df_why %>% filter(treatment == "combo", gender == "Woman") %>% select(reflective_4)
# Create corpus
docs <- VCorpus(VectorSource(vector_wc))
# Clean corpus
docs <-
docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace) %>%
tm_map(content_transformer(tolower)) %>%
tm_map(removeWords, stopwords("english"))
BigramTokenizer <- function(x) c(unlist(lapply(ngrams(words(x), 1), paste, collapse = " "), use.names = FALSE),
unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE))
# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs, control = list(tokenize = BigramTokenizer)))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)
# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 25, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))row.names(df_freetext) <- 1:nrow(df_freetext)
as.matrix(df_freetext[1:20,]) %>% kable(format = 'pipe') %>% print()| word | freq |
|---|---|
| yes | 177 |
| information | 144 |
| na na | 95 |
| media | 69 |
| na yes | 69 |
| social | 69 |
| social media | 66 |
| share | 64 |
| post | 56 |
| dont | 55 |
| misleading | 43 |
| sharing | 40 |
| misinformation | 39 |
| now | 38 |
| posts | 37 |
| sure | 33 |
| keen | 30 |
| see | 29 |
| believe | 26 |
| yesi | 26 |
WordCloud with Word Frequency >= 25
Man
vector_wc <- df_why %>% filter(treatment == "emotion", gender == "Man") %>% select(reflective_5)
# Create corpus
docs <- VCorpus(VectorSource(vector_wc))
# Clean corpus
docs <-
docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace) %>%
tm_map(content_transformer(tolower)) %>%
tm_map(removeWords, stopwords("english"))
BigramTokenizer <- function(x) c(unlist(lapply(ngrams(words(x), 1), paste, collapse = " "), use.names = FALSE),
unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE))
# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs, control = list(tokenize = BigramTokenizer)))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)
# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 25, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))row.names(df_freetext) <- 1:nrow(df_freetext)
as.matrix(df_freetext[1:20,]) %>% kable(format = 'pipe') %>% print()| word | freq |
|---|---|
| information | 347 |
| na na | 189 |
| misinformation | 188 |
| media | 184 |
| social | 181 |
| social media | 173 |
| sharing | 120 |
| misleading | 117 |
| share | 116 |
| post | 100 |
| always | 93 |
| misleading information | 56 |
| posts | 56 |
| true | 52 |
| careful | 48 |
| avoid | 45 |
| sure | 44 |
| disinformation | 41 |
| dont | 40 |
| misinformation na | 38 |
Woman
vector_wc <- df_why %>% filter(treatment == "emotion", gender == "Woman") %>% select(reflective_5)
# Create corpus
docs <- VCorpus(VectorSource(vector_wc))
# Clean corpus
docs <-
docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace) %>%
tm_map(content_transformer(tolower)) %>%
tm_map(removeWords, stopwords("english"))
BigramTokenizer <- function(x) c(unlist(lapply(ngrams(words(x), 1), paste, collapse = " "), use.names = FALSE),
unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE))
# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs, control = list(tokenize = BigramTokenizer)))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)
# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 25, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))row.names(df_freetext) <- 1:nrow(df_freetext)
as.matrix(df_freetext[1:20,]) %>% kable(format = 'pipe') %>% print()| word | freq |
|---|---|
| information | 182 |
| media | 137 |
| social | 135 |
| social media | 133 |
| misinformation | 103 |
| na na | 103 |
| share | 80 |
| sharing | 67 |
| misleading | 59 |
| post | 59 |
| true | 47 |
| posts | 43 |
| always | 41 |
| careful | 31 |
| believe | 30 |
| sure | 30 |
| misleading information | 29 |
| dont | 28 |
| never | 28 |
| see | 28 |
WordCloud with Word Frequency >= 25
Man
vector_wc <- df_why %>% filter(treatment == "tactics", gender == "Man") %>% select(reflective_5)
# Create corpus
docs <- VCorpus(VectorSource(vector_wc))
# Clean corpus
docs <-
docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace) %>%
tm_map(content_transformer(tolower)) %>%
tm_map(removeWords, stopwords("english"))
BigramTokenizer <- function(x) c(unlist(lapply(ngrams(words(x), 1), paste, collapse = " "), use.names = FALSE),
unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE))
# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs, control = list(tokenize = BigramTokenizer)))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)
# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 25, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))row.names(df_freetext) <- 1:nrow(df_freetext)
as.matrix(df_freetext[1:20,]) %>% kable(format = 'pipe') %>% print()| word | freq |
|---|---|
| information | 358 |
| social | 194 |
| media | 192 |
| social media | 186 |
| misinformation | 161 |
| na na | 158 |
| misleading | 131 |
| share | 98 |
| sharing | 88 |
| misleading information | 77 |
| always | 76 |
| post | 70 |
| posts | 63 |
| true | 60 |
| careful | 50 |
| check | 47 |
| people | 41 |
| source | 41 |
| keen | 38 |
| avoid | 36 |
Woman
vector_wc <- df_why %>% filter(treatment == "tactics", gender == "Woman") %>% select(reflective_5)
# Create corpus
docs <- VCorpus(VectorSource(vector_wc))
# Clean corpus
docs <-
docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace) %>%
tm_map(content_transformer(tolower)) %>%
tm_map(removeWords, stopwords("english"))
BigramTokenizer <- function(x) c(unlist(lapply(ngrams(words(x), 1), paste, collapse = " "), use.names = FALSE),
unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE))
# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs, control = list(tokenize = BigramTokenizer)))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)
# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 25, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))row.names(df_freetext) <- 1:nrow(df_freetext)
as.matrix(df_freetext[1:20,]) %>% kable(format = 'pipe') %>% print()| word | freq |
|---|---|
| information | 144 |
| media | 113 |
| social | 113 |
| social media | 110 |
| misinformation | 94 |
| na na | 94 |
| misleading | 73 |
| post | 49 |
| share | 43 |
| posts | 41 |
| true | 38 |
| sharing | 34 |
| always | 33 |
| see | 31 |
| misleading information | 30 |
| careful | 28 |
| everything | 28 |
| keen | 28 |
| tell | 28 |
| sure | 25 |
WordCloud with Word Frequency >= 25
Man
vector_wc <- df_why %>% filter(treatment == "combo", gender == "Man") %>% select(reflective_5)
# Create corpus
docs <- VCorpus(VectorSource(vector_wc))
# Clean corpus
docs <-
docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace) %>%
tm_map(content_transformer(tolower)) %>%
tm_map(removeWords, stopwords("english"))
BigramTokenizer <- function(x) c(unlist(lapply(ngrams(words(x), 1), paste, collapse = " "), use.names = FALSE),
unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE))
# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs, control = list(tokenize = BigramTokenizer)))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)
# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 25, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))row.names(df_freetext) <- 1:nrow(df_freetext)
as.matrix(df_freetext[1:20,]) %>% kable(format = 'pipe') %>% print()| word | freq |
|---|---|
| information | 344 |
| media | 184 |
| social | 181 |
| social media | 175 |
| misinformation | 160 |
| na na | 157 |
| misleading | 128 |
| share | 128 |
| post | 104 |
| sharing | 83 |
| posts | 76 |
| always | 75 |
| misleading information | 70 |
| careful | 54 |
| dont | 49 |
| true | 43 |
| check | 41 |
| sure | 39 |
| tell | 35 |
| see | 34 |
Woman
vector_wc <- df_why %>% filter(treatment == "combo", gender == "Woman") %>% select(reflective_5)
# Create corpus
docs <- VCorpus(VectorSource(vector_wc))
# Clean corpus
docs <-
docs %>%
tm_map(removeNumbers) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace) %>%
tm_map(content_transformer(tolower)) %>%
tm_map(removeWords, stopwords("english"))
BigramTokenizer <- function(x) c(unlist(lapply(ngrams(words(x), 1), paste, collapse = " "), use.names = FALSE),
unlist(lapply(ngrams(words(x), 2), paste, collapse = " "), use.names = FALSE))
# Create doc-term matrix
matrix <- as.matrix(TermDocumentMatrix(docs, control = list(tokenize = BigramTokenizer)))
words <- sort(rowSums(matrix), decreasing = TRUE)
df_freetext <- data.frame(word = names(words), freq = words)
# Create wordcloud
wordcloud(words = df_freetext$word, freq = df_freetext$freq, min.freq = 25, max.words = 200, random.order = FALSE, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))row.names(df_freetext) <- 1:nrow(df_freetext)
as.matrix(df_freetext[1:20,]) %>% kable(format = 'pipe') %>% print()| word | freq |
|---|---|
| information | 162 |
| social | 99 |
| media | 96 |
| misinformation | 96 |
| social media | 92 |
| na na | 88 |
| share | 73 |
| misleading | 60 |
| sharing | 57 |
| always | 54 |
| post | 45 |
| posts | 44 |
| true | 41 |
| careful | 30 |
| sure | 30 |
| misleading information | 28 |
| people | 26 |
| dont | 24 |
| avoid | 23 |
| know | 23 |
Current Heuristics (contain any of the following keywords): stop|think|first|check|evaluate|identify|investigate|analyze|research|pause|question|verify|verified|identified|prove|proved|differentiate|distinguish|(tell&difference)|spot|confirm|confirmed|researched|analyzed|before|(tell&between)|ask myself|asked myself
reflective_questions <- c("What are some techniques that people use to create misleading social media posts?",
"When browsing your timeline in the last month, did you notice any post that looked misleading? If so, what made it seem misleading?",
"How did you feel when you saw the misleading post? If you haven't seen any misleading posts recently, how do you think you would feel?",
"Has the Inoculation against Misinformation course changed your behavior on social media? If so, how?",
"If you were to tell a friend what you learned in the course, what tip would you share?")df_4 <- df_why[df_why$gender != "Other", c("treatment", "gender", "reflective_4")]
df_4$contain_course_info <- ifelse(grepl("stop|think|first|check|evaluate|identify|investigate|analyze|research|pause|question|verify|verified|identified|prove|proved|differentiate|distinguish|tell.*difference|spot|confirm|confirmed|researched|analyzed|before|tell.*between|ask myself|asked myself", df_4$reflective_4), 1L, 0L)
df_4 %>% group_by(treatment, gender) %>% summarise(percentage_mentioned = mean(contain_course_info), count_mentioned = sum(contain_course_info), total_in_group = n()) %>% filter(treatment %in% c("combo", "emotion", "tactics"))output <- df_4[order(df_4$contain_course_info, decreasing = TRUE), c("treatment", "gender", "contain_course_info", "reflective_4")]
colnames(output) <- c("treatment_group", "gender", "contain_course_keyword", reflective_questions[4])test_1_q4 <- t.test(output %>% filter(treatment_group %in% c("emotion") & gender == "Man") %>% select(contain_course_keyword),
output %>% filter(treatment_group %in% c("emotion") & gender == "Woman") %>% select(contain_course_keyword),
alternative = "greater")
test_2_q4 <- t.test(output %>% filter(treatment_group %in% c("tactics"), gender == "Man") %>% select(contain_course_keyword),
output %>% filter(treatment_group %in% c("tactics"), gender == "Woman") %>% select(contain_course_keyword),
alternative = "greater")
test_3_q4 <- t.test(output %>% filter(treatment_group %in% c("combo"), gender == "Man") %>% select(contain_course_keyword),
output %>% filter(treatment_group %in% c("combo"), gender == "Woman") %>% select(contain_course_keyword),
alternative = "greater")
results_q4 = data.frame(matrix(NA, 3, 6))
colnames(results_q4) = c("estimates","std.err","CI_lw","CI_up","ts","p_val")
rownames(results_q4) = c("Test 1 - Man vs Woman (Emotion)",
"Test 2 - Man vs Woman (Reasoning)",
"Test 3 - Man vs Woman (Combo)")
results_q4$estimates <- c(test_1_q4$estimate[1] - test_1_q4$estimate[2],
test_2_q4$estimate[1] - test_2_q4$estimate[2],
test_3_q4$estimate[1] - test_3_q4$estimate[2])
results_q4$std.err <- c(test_1_q4$stderr,
test_2_q4$stderr,
test_3_q4$stderr)
results_q4$CI_lw <- c(test_1_q4$conf.int[1],
test_2_q4$conf.int[1],
test_3_q4$conf.int[1])
results_q4$CI_up <- c(test_1_q4$conf.int[2],
test_2_q4$conf.int[2],
test_3_q4$conf.int[2])
results_q4$ts <- c(test_1_q4$statistic,
test_2_q4$statistic,
test_3_q4$statistic)
results_q4$p_val <- c(test_1_q4$p.value,
test_2_q4$p.value,
test_3_q4$p.value)
results_q4$p_val_holm = p.adjust(results_q4$p_val,"holm")
results_q4df_5 <- df_why[df_why$gender != "Other", c("treatment", "gender", "reflective_5")]
df_5$contain_course_info <- ifelse(grepl("stop|think|first|check|evaluate|identify|investigate|analyze|research|pause|question|verify|verified|identified|prove|proved|differentiate|distinguish|tell.*difference|spot|confirm|confirmed|researched|analyzed|before|tell.*between|ask myself|asked myself", df_5$reflective_5), 1L, 0L)
df_5 %>% group_by(treatment, gender) %>% summarise(percentage_mentioned = mean(contain_course_info), count_mentioned = sum(contain_course_info), total_in_group = n()) %>% filter(treatment %in% c("combo", "emotion", "tactics"))output <- df_5[order(df_5$contain_course_info, decreasing = TRUE), c("treatment", "gender", "contain_course_info", "reflective_5")]
colnames(output) <- c("treatment_group", "gender", "contain_course_keyword", reflective_questions[5])test_1_q5 <- t.test(output %>% filter(treatment_group %in% c("emotion") & gender == "Man") %>% select(contain_course_keyword),
output %>% filter(treatment_group %in% c("emotion") & gender == "Woman") %>% select(contain_course_keyword),
alternative = "greater")
test_2_q5 <- t.test(output %>% filter(treatment_group %in% c("tactics"), gender == "Man") %>% select(contain_course_keyword),
output %>% filter(treatment_group %in% c("tactics"), gender == "Woman") %>% select(contain_course_keyword),
alternative = "greater")
test_3_q5 <- t.test(output %>% filter(treatment_group %in% c("combo"), gender == "Man") %>% select(contain_course_keyword),
output %>% filter(treatment_group %in% c("combo"), gender == "Woman") %>% select(contain_course_keyword),
alternative = "greater")
results_q5 = data.frame(matrix(NA, 3, 6))
colnames(results_q5) = c("estimates","std.err","CI_lw","CI_up","ts","p_val")
rownames(results_q5) = c("Test 1 - Man vs Woman (Emotion)",
"Test 2 - Man vs Woman (Reasoning)",
"Test 3 - Man vs Woman (Combo)")
results_q5$estimates <- c(test_1_q5$estimate[1] - test_1_q5$estimate[2],
test_2_q5$estimate[1] - test_2_q5$estimate[2],
test_3_q5$estimate[1] - test_3_q5$estimate[2])
results_q5$std.err <- c(test_1_q5$stderr,
test_2_q5$stderr,
test_3_q5$stderr)
results_q5$CI_lw <- c(test_1_q5$conf.int[1],
test_2_q5$conf.int[1],
test_3_q5$conf.int[1])
results_q5$CI_up <- c(test_1_q5$conf.int[2],
test_2_q5$conf.int[2],
test_3_q5$conf.int[2])
results_q5$ts <- c(test_1_q5$statistic,
test_2_q5$statistic,
test_3_q5$statistic)
results_q5$p_val <- c(test_1_q5$p.value,
test_2_q5$p.value,
test_3_q5$p.value)
results_q5$p_val_holm = p.adjust(results_q5$p_val,"holm")
results_q5Current Heuristics (contain any of the following keywords): misinformation|misleading|disinformation|misinfo|disinfo|accurate|true
df_4 <- df_why[df_why$gender != "Other", c("treatment", "gender", "reflective_4")]
df_4$contain_misinfo_keyword <- ifelse(grepl("misinformation|misleading|disinformation|misinfo|disinfo|accurate|true", df_4$reflective_4), 1L, 0L)
df_4 %>% group_by(treatment, gender) %>% summarise(percentage_mentioned = mean(contain_misinfo_keyword), count_mentioned = sum(contain_misinfo_keyword), total_in_group = n()) %>% filter(treatment %in% c("combo", "emotion", "tactics"))output <- df_4[order(df_4$contain_misinfo_keyword, decreasing = TRUE), c("treatment", "gender", "contain_misinfo_keyword", "reflective_4")]
colnames(output) <- c("treatment_group", "gender", "contain_course_keyword", reflective_questions[4])test_1_q4 <- t.test(output %>% filter(treatment_group %in% c("emotion") & gender == "Man") %>% select(contain_course_keyword),
output %>% filter(treatment_group %in% c("emotion") & gender == "Woman") %>% select(contain_course_keyword),
alternative = "less")
test_2_q4 <- t.test(output %>% filter(treatment_group %in% c("tactics"), gender == "Man") %>% select(contain_course_keyword),
output %>% filter(treatment_group %in% c("tactics"), gender == "Woman") %>% select(contain_course_keyword),
alternative = "less")
test_3_q4 <- t.test(output %>% filter(treatment_group %in% c("combo"), gender == "Man") %>% select(contain_course_keyword),
output %>% filter(treatment_group %in% c("combo"), gender == "Woman") %>% select(contain_course_keyword),
alternative = "less")
results_q4 = data.frame(matrix(NA, 3, 6))
colnames(results_q4) = c("estimates","std.err","CI_lw","CI_up","ts","p_val")
rownames(results_q4) = c("Test 1 - Man vs Woman (Emotion)",
"Test 2 - Man vs Woman (Reasoning)",
"Test 3 - Man vs Woman (Combo)")
results_q4$estimates <- c(test_1_q4$estimate[1] - test_1_q4$estimate[2],
test_2_q4$estimate[1] - test_2_q4$estimate[2],
test_3_q4$estimate[1] - test_3_q4$estimate[2])
results_q4$std.err <- c(test_1_q4$stderr,
test_2_q4$stderr,
test_3_q4$stderr)
results_q4$CI_lw <- c(test_1_q4$conf.int[1],
test_2_q4$conf.int[1],
test_3_q4$conf.int[1])
results_q4$CI_up <- c(test_1_q4$conf.int[2],
test_2_q4$conf.int[2],
test_3_q4$conf.int[2])
results_q4$ts <- c(test_1_q4$statistic,
test_2_q4$statistic,
test_3_q4$statistic)
results_q4$p_val <- c(test_1_q4$p.value,
test_2_q4$p.value,
test_3_q4$p.value)
results_q4$p_val_holm = p.adjust(results_q4$p_val,"holm")
results_q4df_5 <- df_why[df_why$gender != "Other", c("treatment", "gender", "reflective_5")]
df_5$contain_misinfo_keyword <- ifelse(grepl("misinformation|misleading|disinformation|misinfo|disinfo|accurate|true", df_5$reflective_5), 1L, 0L)
df_5 %>% group_by(treatment, gender) %>% summarise(percentage_mentioned = mean(contain_misinfo_keyword), count_mentioned = sum(contain_misinfo_keyword), total_in_group = n()) %>% filter(treatment %in% c("combo", "emotion", "tactics"))output <- df_5[order(df_5$contain_misinfo_keyword, decreasing = TRUE), c("treatment", "gender", "contain_misinfo_keyword", "reflective_5")]
colnames(output) <- c("treatment_group", "gender", "contain_course_keyword", reflective_questions[5])test_1_q5 <- t.test(output %>% filter(treatment_group %in% c("emotion") & gender == "Man") %>% select(contain_course_keyword),
output %>% filter(treatment_group %in% c("emotion") & gender == "Woman") %>% select(contain_course_keyword),
alternative = "less")
test_2_q5 <- t.test(output %>% filter(treatment_group %in% c("tactics"), gender == "Man") %>% select(contain_course_keyword),
output %>% filter(treatment_group %in% c("tactics"), gender == "Woman") %>% select(contain_course_keyword),
alternative = "less")
test_3_q5 <- t.test(output %>% filter(treatment_group %in% c("combo"), gender == "Man") %>% select(contain_course_keyword),
output %>% filter(treatment_group %in% c("combo"), gender == "Woman") %>% select(contain_course_keyword),
alternative = "less")
results_q5 = data.frame(matrix(NA, 3, 6))
colnames(results_q5) = c("estimates","std.err","CI_lw","CI_up","ts","p_val")
rownames(results_q5) = c("Test 1 - Man vs Woman (Emotion)",
"Test 2 - Man vs Woman (Reasoning)",
"Test 3 - Man vs Woman (Combo)")
results_q5$estimates <- c(test_1_q5$estimate[1] - test_1_q5$estimate[2],
test_2_q5$estimate[1] - test_2_q5$estimate[2],
test_3_q5$estimate[1] - test_3_q5$estimate[2])
results_q5$std.err <- c(test_1_q5$stderr,
test_2_q5$stderr,
test_3_q5$stderr)
results_q5$CI_lw <- c(test_1_q5$conf.int[1],
test_2_q5$conf.int[1],
test_3_q5$conf.int[1])
results_q5$CI_up <- c(test_1_q5$conf.int[2],
test_2_q5$conf.int[2],
test_3_q5$conf.int[2])
results_q5$ts <- c(test_1_q5$statistic,
test_2_q5$statistic,
test_3_q5$statistic)
results_q5$p_val <- c(test_1_q5$p.value,
test_2_q5$p.value,
test_3_q5$p.value)
results_q5$p_val_holm = p.adjust(results_q5$p_val,"holm")
results_q5