Load Packages

rm(list = ls())
pacman::p_load(DT, estimatr, kableExtra, readr, reshape2, tidyverse, xtable, dataMaid, ggcorrplot, ggmap, rpart, rpart.plot, pollster, wordcloud, tm, RColorBrewer, hrbrthemes, janitor, purrr, gridExtra, cowplot, rcompanion, zoo, data.table, stats, stargazer, ggpatern, htmlTable)

set.seed(94305)
dir.create(file.path('tables'), showWarnings = FALSE)
dir.create(file.path('figures'), showWarnings = FALSE)

Data

# Load the data #
main_data = readRDS("df_final.rds")
#main_data = main_data[main_data$att_check == "1",]
main_data$user = as.integer(main_data$user)
main_data$order = as.integer(as.character(main_data$order))
main_data$type = recode(main_data$type,"base"="Base posts",
                                            "tactics" = "Info posts",
                                            "emotion" = "Emotions posts",
                                            "combo" = "Combo posts")
main_data$share = as.numeric(main_data$share == "Yes")
main_data$accuracy = recode(main_data$accuracy,"end"="No nudge","inter"="Nudge")
main_data$treatment = recode(main_data$treatment,"reminder"="Facts baseline",
                                                 "control"="No-course baseline",
                                                 "tactics" = "Info",
                                                 "emotion" = "Emotions",
                                                 "combo" = "Combo")
main_data$treatment = factor(main_data$treatment, levels = c("No-course baseline","Facts baseline","Info","Emotions","Combo"))
main_data$course = recode(main_data$treatment, "No-course baseline" ="Combo")
main_data$course = factor(main_data$course, levels = c("Facts baseline","Info","Emotions","Combo"))
main_data = main_data[with(main_data, order(user, order)), ]
main_data <- main_data %>% mutate(perceived_accuracy = case_when(perceived_accuracy == "Not at all accurate" ~ "0",
                                                    perceived_accuracy == "Not very accurate" ~ "1",
                                                    perceived_accuracy == "Somewhat accurate" ~ "2",
                                                    perceived_accuracy == "Very accurate" ~ "3",
                                                    TRUE ~ as.character(perceived_accuracy)),
                     perceived_accuracy = as.integer(perceived_accuracy))
family_2_data_final = aggregate(perceived_accuracy ~user*treatment*accuracy*pre_post,main_data[main_data$type!="Base posts",],mean)
# Aggregate at the user level the sharing decisions in the misinfo posts separately in pre and post, and take the difference #  
family_2_data_final_pre = aggregate(perceived_accuracy ~user*treatment*accuracy,main_data[main_data$type!="Base posts" & main_data$pre_post=="pre",],mean) # & main_data$treatment %in% c("Info","Emotions","Combo")
family_2_data_final_post = aggregate(perceived_accuracy ~user*treatment*accuracy,main_data[main_data$type!="Base posts" & main_data$pre_post=="post",],mean) #  & main_data$treatment %in% c("Info","Emotions","Combo")
family_2_data_final_post$perceived_accuracy_diff = family_2_data_final_post$perceived_accuracy - family_2_data_final_pre$perceived_accuracy

Tests

Outcome is the pre-post difference in perceived accuracy for misinformation posts.

test_1 =  t.test(family_2_data_final_post$perceived_accuracy_diff[
           family_2_data_final_post$treatment %in% c("Info","Emotions","Combo")],
           family_2_data_final_post$perceived_accuracy_diff[
           family_2_data_final_post$treatment %in% c("No-course baseline")],alternative=c("less"))   
test_2 =  t.test(family_2_data_final_post$perceived_accuracy_diff[
           family_2_data_final_post$treatment %in% c("Info","Emotions","Combo")],
           family_2_data_final_post$perceived_accuracy_diff[
           family_2_data_final_post$treatment %in% c("Facts baseline")],alternative=c("less"))   
test_3 =  t.test(family_2_data_final_post$perceived_accuracy_diff[
           family_2_data_final_post$treatment %in% c("Info")],
           family_2_data_final_post$perceived_accuracy_diff[
           family_2_data_final_post$treatment %in% c("No-course baseline")],alternative=c("less"))   
test_4 =  t.test(family_2_data_final_post$perceived_accuracy_diff[
           family_2_data_final_post$treatment %in% c("Emotions")],
           family_2_data_final_post$perceived_accuracy_diff[
           family_2_data_final_post$treatment %in% c("No-course baseline")],alternative=c("less"))   
test_5 =  t.test(family_2_data_final_post$perceived_accuracy_diff[
           family_2_data_final_post$treatment %in% c("Combo")],
           family_2_data_final_post$perceived_accuracy_diff[
           family_2_data_final_post$treatment %in% c("No-course baseline")],alternative=c("less")) 
test_6 =  t.test(family_2_data_final_post$perceived_accuracy_diff[
           family_2_data_final_post$treatment %in% c("Info")],
           family_2_data_final_post$perceived_accuracy_diff[
           family_2_data_final_post$treatment %in% c("Facts baseline")],alternative=c("less"))   
test_7 =  t.test(family_2_data_final_post$perceived_accuracy_diff[
           family_2_data_final_post$treatment %in% c("Emotions")],
           family_2_data_final_post$perceived_accuracy_diff[
           family_2_data_final_post$treatment %in% c("Facts baseline")],alternative=c("less"))   
test_8 =  t.test(family_2_data_final_post$perceived_accuracy_diff[
           family_2_data_final_post$treatment %in% c("Combo")],
           family_2_data_final_post$perceived_accuracy_diff[
           family_2_data_final_post$treatment %in% c("Facts baseline")],alternative=c("less"))   
test_9 =  t.test(family_2_data_final_post$perceived_accuracy_diff[
           family_2_data_final_post$treatment %in% c("Facts baseline")],
           family_2_data_final_post$perceived_accuracy_diff[
           family_2_data_final_post$treatment %in% c("No-course baseline")],alternative=c("less"))   
results = data.frame(matrix(NA,9,6))

    
colnames(results) = c("estimates","std.err","CI_lw","CI_up","ts","p_val")

rownames(results) = paste("Test ",1:9,sep="")
                                                 
results$estimates = c(     test_1$estimate[1]-test_1$estimate[2],

                               test_2$estimate[1]-test_2$estimate[2],

                               test_3$estimate[1]-test_3$estimate[2],

                               test_4$estimate[1]-test_4$estimate[2],

                               test_5$estimate[1]-test_5$estimate[2],

                               test_6$estimate[1]-test_6$estimate[2],

                               test_7$estimate[1]-test_7$estimate[2],
 
                               test_8$estimate[1]-test_8$estimate[2],

                               test_9$estimate[1]-test_9$estimate[2])

results$std.err = c(test_1$stderr, 
                        test_2$stderr, 
                        test_3$stderr, 
                        test_4$stderr,
                        test_5$stderr, 
                        test_6$stderr,
                        test_7$stderr, 
                        test_8$stderr,
                        test_9$stderr)
                                            
results$CI_lw = c(  test_1$conf.int[1], 
                        test_2$conf.int[1],
                        test_3$conf.int[1], 
                        test_4$conf.int[1], 
                        test_5$conf.int[1], 
                        test_6$conf.int[1], 
                        test_7$conf.int[1], 
                        test_8$conf.int[1], 
                        test_9$conf.int[1])

results$CI_up = c(  test_1$conf.int[2], 
                        test_2$conf.int[2], 
                        test_3$conf.int[2], 
                        test_4$conf.int[2], 
                        test_5$conf.int[2], 
                        test_6$conf.int[2], 
                        test_7$conf.int[2], 
                        test_8$conf.int[2], 
                        test_9$conf.int[2])                                            

results$ts = c(     test_1$statistic,
                        test_2$statistic, 
                        test_3$statistic, 
                        test_4$statistic, 
                        test_5$statistic, 
                        test_6$statistic, 
                        test_7$statistic, 
                        test_8$statistic, 
                        test_9$statistic)

results$p_val = c(     test_1$p.value, 
                           test_2$p.value, 
                           test_3$p.value,
                           test_4$p.value, 
                           test_5$p.value, 
                           test_6$p.value, 
                           test_7$p.value, 
                           test_8$p.value,
                           test_9$p.value)

results$p_val_holm = p.adjust(results$p_val,"holm")
rownames(results) <- c("Test 1 Accuracy: Treatment Courses v. No-course baseline",
                       "Test 2 Accuracy: Treatment Courses v. Facts baseline",
                       "Test 3 Accuracy: Info course v. No-course baseline",
                       "Test 4 Accuracy: Emotions Courses v. No-course baseline",
                       "Test 5 Accuracy: Combo Courses v. No-course baseline",
                       "Test 6 Accuracy: Info Courses v. Facts baseline",
                       "Test 7 Accuracy: Emotions Courses v. Facts baseline",
                       "Test 8 Accuracy: Combo Courses v. Facts baseline",
                       "Test 9 Accuracy: Facts baseline v. No-course baseline")

round(results, 4) %>% htmlTable
estimates std.err CI_lw CI_up ts p_val p_val_holm
Test 1 Accuracy: Treatment Courses v. No-course baseline -0.2549 0.019 -Inf -0.2236 -13.4231 0 0
Test 2 Accuracy: Treatment Courses v. Facts baseline -0.1113 0.0193 -Inf -0.0796 -5.7753 0 0
Test 3 Accuracy: Info course v. No-course baseline -0.2254 0.0234 -Inf -0.1869 -9.6233 0 0
Test 4 Accuracy: Emotions Courses v. No-course baseline -0.274 0.0233 -Inf -0.2356 -11.7477 0 0
Test 5 Accuracy: Combo Courses v. No-course baseline -0.2634 0.0235 -Inf -0.2247 -11.1907 0 0
Test 6 Accuracy: Info Courses v. Facts baseline -0.0818 0.0236 -Inf -0.0429 -3.4583 3e-04 3e-04
Test 7 Accuracy: Emotions Courses v. Facts baseline -0.1303 0.0235 -Inf -0.0916 -5.5354 0 0
Test 8 Accuracy: Combo Courses v. Facts baseline -0.1197 0.0238 -Inf -0.0807 -5.0402 0 0
Test 9 Accuracy: Facts baseline v. No-course baseline -0.1436 0.0233 -Inf -0.1053 -6.1635 0 0

By Nudge

test_10 =  t.test(family_2_data_final_pre$perceived_accuracy[
           family_2_data_final_pre$accuracy %in% c("Nudge")],
           family_2_data_final_pre$perceived_accuracy[
           family_2_data_final_pre$accuracy %in% c("No nudge")],alternative=c("less"))

test_11 =  t.test(family_2_data_final_post$perceived_accuracy[
           family_2_data_final_post$accuracy %in% c("Nudge")],
           family_2_data_final_post$perceived_accuracy[
           family_2_data_final_post$accuracy %in% c("No nudge")],alternative=c("less"))

test_12 =  t.test(family_2_data_final_post$perceived_accuracy_diff[
           family_2_data_final_post$accuracy %in% c("Nudge")],
           family_2_data_final_post$perceived_accuracy_diff[
           family_2_data_final_post$accuracy %in% c("No nudge")],alternative=c("less"))
results_2 = data.frame(matrix(NA,3,6))

    
colnames(results_2) = c("estimates","std.err","CI_lw","CI_up","ts","p_val")

rownames(results_2) = paste("Test ",10:12,sep="")
                                                 
results_2$estimates = c(   test_10$estimate[1]-test_10$estimate[2],
                           test_11$estimate[1]-test_11$estimate[2],
                           test_12$estimate[1]-test_12$estimate[2])

results_2$std.err = c(test_10$stderr, 
                    test_11$stderr, 
                    test_12$stderr)
                                            
results_2$CI_lw = c(  test_10$conf.int[1], 
                      test_11$conf.int[1],
                      test_12$conf.int[1])

results_2$CI_up = c(  test_10$conf.int[2], 
                      test_11$conf.int[2], 
                      test_12$conf.int[2])                                            

results_2$ts = c(     test_10$statistic,
                      test_11$statistic,
                      test_12$statistic)

results_2$p_val = c(     test_10$p.value, 
                         test_11$p.value, 
                         test_12$p.value)

results_2$p_val_holm = p.adjust(results_2$p_val,"holm")

rownames(results_2) <- c("Test 10 Accuracy: Nudge v. No Nudge - PRE-survey",
                         "Test 11 Accuracy: Nudge v. No Nudge - POST-survey",
                         "Test 12 Accuracy: Nudge v. No Nudge - PRE-POST difference")
round(results_2, 4) %>% htmlTable
estimates std.err CI_lw CI_up ts p_val p_val_holm
Test 10 Accuracy: Nudge v. No Nudge - PRE-survey -0.0549 0.0142 -Inf -0.0316 -3.8687 1e-04 1e-04
Test 11 Accuracy: Nudge v. No Nudge - POST-survey -0.0662 0.0146 -Inf -0.0422 -4.5359 0 0
Test 12 Accuracy: Nudge v. No Nudge - PRE-POST difference -0.0113 0.0151 -Inf 0.0135 -0.7501 0.2266 0.2266

By Sharing within Nudge Group

main_data_pre <- main_data %>% filter(pre_post == "pre")
main_data_post <- main_data %>% filter(pre_post == "post")
test_13 =  t.test(main_data_pre$perceived_accuracy[
           main_data_pre$accuracy %in% c("Nudge") & main_data_pre$share == 1],
           main_data_pre$perceived_accuracy[
           main_data_pre$accuracy %in% c("Nudge") & main_data_pre$share == 0],alternative=c("two.sided"))

test_14 =  t.test(main_data_post$perceived_accuracy[
           main_data_post$accuracy %in% c("Nudge") & main_data_post$share == 1],
           main_data_post$perceived_accuracy[
           main_data_post$accuracy %in% c("Nudge") & main_data_post$share == 0],alternative=c("two.sided"))


test_15 =  t.test(main_data_post$perceived_accuracy[
           main_data_post$accuracy %in% c("Nudge") & main_data_post$share == 1],
           main_data_pre$perceived_accuracy[
           main_data_pre$accuracy %in% c("Nudge") & main_data_post$share == 1],alternative=c("two.sided"))

test_16 =  t.test(main_data_post$perceived_accuracy[
           main_data_post$accuracy %in% c("Nudge") & main_data_post$share == 0],
           main_data_pre$perceived_accuracy[
           main_data_pre$accuracy %in% c("Nudge") & main_data_post$share == 0],alternative=c("two.sided"))
results_3 = data.frame(matrix(NA,4,6))

    
colnames(results_3) = c("estimates","std.err","CI_lw","CI_up","ts","p_val")

rownames(results_3) = paste("Test ",13:16,sep="")
                                                 
results_3$estimates = c(   test_13$estimate[1]-test_13$estimate[2],
                           test_14$estimate[1]-test_14$estimate[2],
                           test_15$estimate[1]-test_15$estimate[2],
                           test_16$estimate[1]-test_16$estimate[2])

results_3$std.err = c(test_13$stderr, 
                    test_14$stderr, 
                    test_15$stderr,
                    test_16$stderr)
                                            
results_3$CI_lw = c(  test_13$conf.int[1], 
                      test_14$conf.int[1],
                      test_15$conf.int[1],
                      test_16$conf.int[1])

results_3$CI_up = c(  test_13$conf.int[2], 
                      test_14$conf.int[2], 
                      test_15$conf.int[2],
                      test_16$conf.int[2])                                            

results_3$ts = c(     test_13$statistic,
                      test_14$statistic,
                      test_15$statistic,
                      test_16$statistic)

results_3$p_val = c(     test_13$p.value, 
                         test_14$p.value, 
                         test_15$p.value,
                         test_16$p.value)

results_3$p_val_holm = p.adjust(results_3$p_val,"holm")

row.names(results_3) <- c("Test 13 Accuracy: Share v. No Share - PRE-survey (Nudge Group)",
                         "Test 14 Accuracy: Share v. No Share - POST-survey (Nudge Group)",
                         "Test 15 Accuracy: PRE v. POST - Share (Nudge Group)",
                         "Test 16 Accuracy: PRE v. POST - No Share (Nudge Group)")
round(results_3, 4) %>% htmlTable
estimates std.err CI_lw CI_up ts p_val p_val_holm
Test 13 Accuracy: Share v. No Share - PRE-survey (Nudge Group) 1.2816 0.0092 1.2636 1.2995 139.8931 0 0
Test 14 Accuracy: Share v. No Share - POST-survey (Nudge Group) 1.3685 0.0083 1.3521 1.3849 163.9614 0 0
Test 15 Accuracy: PRE v. POST - Share (Nudge Group) 0.3843 0.0103 0.3641 0.4044 37.3264 0 0
Test 16 Accuracy: PRE v. POST - No Share (Nudge Group) -0.6932 0.0093 -0.7113 -0.675 -74.8903 0 0