Goal

  • This script replicates the tables and figures used in the presentation slides for the FB Misinformation Project. The presentation can be found here.

  • The folder that contains all of the figures is located at ~fb_misinfo_interventions/data/presentation

Loading the dataset

  • The raw datasets we used to produce these figures can be found at ~fb_misinfo_interventions/data/chatfuel/processed

Slide 8

Phase 1 results: differential attrition

se_cont = function(x, na.rm=FALSE) {
  if (na.rm) x <- na.omit(x)
  sqrt(var(x)/length(x))}

se_binary = function(x, na.rm=FALSE) {
  if (na.rm) x <- na.omit(x)
  sqrt(mean(x)*(1-mean(x))/length(x))}

# add parentheses
add_parentheses <- function(x) {
  if (!is.na(x) && x!="" && !is.na(as.numeric(x))) {
    formatted_x <- formatC(as.numeric(x), format = "f", digits = 4)
    formatted_x[formatted_x != "NA"] <- paste0("(", formatted_x[formatted_x != "NA"], ")")
    formatted_x
  } else {
    x
  }
}
df_table <- data_main %>% 
  select(analytic_id, consent_coded_num, phase_coded, arm_coded, total_duration, intervention_duration, misinfo_quiz_duration, consent_coded_num, quiz_completed_coded, attention_check_passed) %>% mutate(completed_intervention = ifelse(intervention_duration > 0, 1, 0), 
                                                                                                                                                                                                     completedquiz_consent = quiz_completed_coded / consent_coded_num)

df_table_phase1 <- df_table %>% filter(phase_coded == "Phase 1")


consented_1 <- data.frame("# Consented" = c(
  length(unique(df_table_phase1$analytic_id[df_table_phase1$consent_coded_num == 1])),
  length(unique(df_table_phase1$analytic_id[df_table_phase1$arm_coded=="Original Baseline" & df_table_phase1$consent_coded_num == 1])),
  length(unique(df_table_phase1$analytic_id[df_table_phase1$arm_coded=="SMS" & df_table_phase1$consent_coded_num == 1])),
  length(unique(df_table_phase1$analytic_id[df_table_phase1$arm_coded=="Video" & df_table_phase1$consent_coded_num == 1])),
  length(unique(df_table_phase1$analytic_id[df_table_phase1$arm_coded=="Game" & df_table_phase1$consent_coded_num == 1]))
  ))

completed_1 <- data.frame("# Completed Intervention" = c(
    sum(!is.na(df_table_phase1$intervention_duration[df_table_phase1$arm_coded=="Original Baseline"]))+sum(!is.na(df_table_phase1$intervention_duration[df_table_phase1$arm_coded=="SMS"])) + sum(!is.na(df_table_phase1$intervention_duration[df_table_phase1$arm_coded=="Video"])) + sum(!is.na(df_table_phase1$intervention_duration[df_table_phase1$arm_coded=="Game"])),
    sum(!is.na(df_table_phase1$intervention_duration[df_table_phase1$arm_coded=="Original Baseline"])),
    sum(!is.na(df_table_phase1$intervention_duration[df_table_phase1$arm_coded=="SMS"])),
    sum(!is.na(df_table_phase1$intervention_duration[df_table_phase1$arm_coded=="Video"])),
    sum(!is.na(df_table_phase1$intervention_duration[df_table_phase1$arm_coded=="Game"]))
    ))


completed_quiz_1 <- data.frame("# Completed Quiz" = c(
    sum(df_table_phase1$quiz_completed_coded[df_table_phase1$arm_coded=="Original Baseline"])+sum(df_table_phase1$quiz_completed_coded[df_table_phase1$arm_coded=="SMS"]) + sum(df_table_phase1$quiz_completed_coded[df_table_phase1$arm_coded=="Video"]) + sum(df_table_phase1$quiz_completed_coded[df_table_phase1$arm_coded=="Game"]),
    sum(df_table_phase1$quiz_completed_coded[df_table_phase1$arm_coded=="Original Baseline"]),
    sum(df_table_phase1$quiz_completed_coded[df_table_phase1$arm_coded=="SMS"]),
    sum(df_table_phase1$quiz_completed_coded[df_table_phase1$arm_coded=="Video"]),
    sum(df_table_phase1$quiz_completed_coded[df_table_phase1$arm_coded=="Game"])
    ))

completedquiz_consent_1 <-data.frame("#Completed / Consent" = completed_quiz_1/consented_1)


median_intervention_1 <- data.frame("median time" = c(
    median(df_table_phase1$intervention_duration, na.rm = TRUE),
    median(df_table_phase1$intervention_duration[df_table_phase1$arm_coded=="Original Baseline"], na.rm = TRUE),
    median(df_table_phase1$intervention_duration[df_table_phase1$arm_coded=="SMS"], na.rm = TRUE),
    median(df_table_phase1$intervention_duration[df_table_phase1$arm_coded=="Video"], na.rm = TRUE),
    median(df_table_phase1$intervention_duration[df_table_phase1$arm_coded=="Game"], na.rm = TRUE)
    ))

# Calculate attention pass rate 

attention_rate <- data.frame("Attention Check Pass Rate" = c(
    mean(df_table_phase1$attention_check_passed, na.rm = TRUE),
    mean(df_table_phase1$attention_check_passed[df_table_phase1$arm_coded=="Original Baseline"], na.rm = TRUE),
    mean(df_table_phase1$attention_check_passed[df_table_phase1$arm_coded=="SMS"], na.rm = TRUE),
    mean(df_table_phase1$attention_check_passed[df_table_phase1$arm_coded=="Video"], na.rm = TRUE),
    mean(df_table_phase1$attention_check_passed[df_table_phase1$arm_coded=="Game"], na.rm = TRUE)
    ))


#combined_intervention_1 <- data.frame(coalesce(median_intervention,se_intervention))

name <- data.frame("Group" = c("All", "Placebo", "SMS", "Video", "Game"))

df_table_1 <- cbind(name, consented_1, completed_1, completed_quiz_1, completedquiz_consent_1, median_intervention_1, attention_rate)

df_table_1 %>% kable(digits = 3, col.names = c("","Consented", "Completed Intervention", "Completed Survey", "Completed Survey / Consented", "Median Intervention Duration", "Passing Attention Check")) |>
      kable_styling(bootstrap_options = c("striped","hover")) |>
      kableExtra::scroll_box( height = "800px")
Consented Completed Intervention Completed Survey Completed Survey / Consented Median Intervention Duration Passing Attention Check
All 29727 20694 15693 0.528 14.817 0.028
Placebo 7369 6066 4571 0.620 10.792 0.028
SMS 7470 5078 4026 0.539 17.483 0.023
Video 7472 4916 3854 0.516 15.533 0.027
Game 7416 4634 3242 0.437 17.383 0.036
# Saving this figure as csv 

write.csv(df_table_1, "data/presentation/phase1_results.csv")

Slide 10

Funnel Statistics

df_funnel <- data_main %>% 
  select(analytic_id, phase_coded, consent_coded_num, quiz_completed_coded, intervention_duration) %>% 
  mutate(completed_intervention = ifelse(intervention_duration > 0, 1, 0))

enter <- data.frame("enter" = c(40508, 50845, 91353))

consent <- data.frame("consent" = c(
  sum(df_funnel$consent_coded_num[df_funnel$phase_coded=="Phase 1"]),
  sum(df_funnel$consent_coded_num[df_funnel$phase_coded=="Phase 2"]),
  sum(df_funnel$consent_coded_num)
  ))

consent_enter <- data.frame("consent / enter" = consent/enter)


completed_intervention <- data.frame("completed intervention" = c(
  sum(!is.na(df_funnel$completed_intervention[df_funnel$phase_coded=="Phase 1"])),
  sum(!is.na(df_funnel$completed_intervention[df_funnel$phase_coded=="Phase 2"])),
  sum(!is.na(df_funnel$completed_intervention))
  ))

completed_intervention_consent <- data.frame("completed intervention / consent" = completed_intervention/consent)


completed_quiz <- data.frame("completed quiz" = c(sum(df_funnel$quiz_completed_coded[df_funnel$phase_coded=="Phase 1"]), sum(df_funnel$quiz_completed_coded[df_funnel$phase_coded=="Phase 2"]), sum(df_funnel$quiz_completed_coded)))

completed_quiz_consent <- data.frame("Completed quiz/ consent" = (completed_quiz/consent))




name <- data.frame("Group" = c("Phase 1", "Phase 2", "Total"))

df_funnel_display <- cbind(name, enter, consent, consent_enter, completed_intervention, completed_intervention_consent, completed_quiz, completed_quiz_consent)

df_funnel_display %>% kable(digits = 3, col.names = c("Group", "Enter", "Consent", "Consent / Enter", "Completed Intervention", "Completed Intervention / Consent", "Completed Quiz", "Completed Quiz / Consented")) |>
      kable_styling(bootstrap_options = c("striped","hover")) |>
      kableExtra::scroll_box( height = "800px")
Group Enter Consent Consent / Enter Completed Intervention Completed Intervention / Consent Completed Quiz Completed Quiz / Consented
Phase 1 40508 29727 0.734 20694 0.696 15693 0.528
Phase 2 50845 40628 0.799 31374 0.772 27114 0.667
Total 91353 70355 0.770 52068 0.740 42807 0.608
# Saving this figure as csv

write.csv(df_funnel_display, "data/presentation/funnel_statistics.csv")

Proportion of participants in each Country, by Phase

df_table <- data_main %>% select(analytic_id, phase_coded, country_coded, quiz_completed_coded) %>% filter(quiz_completed_coded==1)

df_table_country <- df_table %>% 
  group_by(phase_coded) %>% 
  summarise(ghana = sum(country_coded == "Ghana"), kenya = sum(country_coded == "Kenya"), nigeria = sum(country_coded == "Nigeria"), south_africa = sum(country_coded == "South Africa"), total = ghana + kenya+ nigeria + south_africa,
            perc_ghana = ghana/total, perc_kenya = kenya/total, perc_nigeria = nigeria/total, perc_south_africa = south_africa/total) %>% select(phase_coded, perc_ghana, perc_kenya, perc_nigeria, perc_south_africa)

df_table_country %>% 
  kable(digits = 3, col.names = c("Phase", "Ghana", "Kenya", "Nigeria", "South Africa"))|>
      kable_styling(bootstrap_options = c("striped","hover")) |>
      kableExtra::scroll_box( height = "800px")
Phase Ghana Kenya Nigeria South Africa
Phase 1 0.137 0.128 0.417 0.318
Phase 2 0.102 0.137 0.446 0.315
## Saving this figure as csv

write.csv(df_table_country, "data/presentation/country_proportion.csv")

Slide 14

Attrition and Outcomes, by Payment Amount

df_payment <- data_main %>% select(analytic_id, payment_condition_dollar, intervention_duration, misinfo_quiz_duration, consent_coded_num, quiz_completed_coded, sharing_discernment, mean_share_misinfo, mean_share_nonmisinfo, phase_coded) %>% mutate(completed_intervention = ifelse(intervention_duration > 0, 1, 0), 
                                                                                                                                                                                                     completedquiz_consent = quiz_completed_coded / consent_coded_num) %>% filter(phase_coded=="Phase 2")

consented <- data.frame("# Consented" = c(
  length(unique(df_payment$analytic_id[df_payment$consent_coded_num == 1])),
  
  NA,
    
  length(unique(df_payment$analytic_id[df_payment$payment_condition_dollar==0.5 & df_payment$consent_coded_num == 1])),
  
  NA,
      
  length(unique(df_payment$analytic_id[df_payment$payment_condition_dollar==1 & df_payment$consent_coded_num == 1])),
  
  NA,
      
  length(unique(df_payment$analytic_id[df_payment$payment_condition_dollar==3 & df_payment$consent_coded_num == 1])),
  
  NA))

completed_quiz <- data.frame("# Completed Quiz" = c(
    sum(df_payment$quiz_completed_coded[df_payment$payment_condition_dollar==0.5])+sum(df_payment$quiz_completed_coded[df_payment$payment_condition_dollar==1]) + sum(df_payment$quiz_completed_coded[df_payment$payment_condition_dollar==3]),
    NA,
    sum(df_payment$quiz_completed_coded[df_payment$payment_condition_dollar==0.5]),
    NA,
    sum(df_payment$quiz_completed_coded[df_payment$payment_condition_dollar==1]),
    NA,
    sum(df_payment$quiz_completed_coded[df_payment$payment_condition_dollar==3]),
    NA
    ))

completedquiz_consent <-data.frame("#Completed / Consent" = completed_quiz/consented)

se_completed_consent <- data.frame("# SE Completed / Consent" = c(
    se_cont(df_payment$completedquiz_consent, na.rm = TRUE),
    NA,
    se_cont(df_payment$completedquiz_consent[df_payment$payment_condition_dollar==0.5], na.rm = TRUE),
    NA,
    se_cont(df_payment$completedquiz_consent[df_payment$payment_condition_dollar==1], na.rm = TRUE),
    NA,
    se_cont(df_payment$completedquiz_consent[df_payment$payment_condition_dollar==3], na.rm = TRUE),
    NA
    ))

#completedquiz_consent <- data.frame(coalesce(completedquiz_consent,se_completed_consent))

median_intervention <- c(
    median(df_payment$intervention_duration, na.rm = TRUE),
    NA,
    median(df_payment$intervention_duration[df_payment$payment_condition_dollar==0.5], na.rm = TRUE),
    NA,
    median(df_payment$intervention_duration[df_payment$payment_condition_dollar==1], na.rm = TRUE),
    NA,
    median(df_payment$intervention_duration[df_payment$payment_condition_dollar==3], na.rm = TRUE),
    NA
    )

se_intervention <- c(NA,
    se_cont(df_payment$intervention_duration, na.rm = TRUE),
    NA,
    se_cont(df_payment$intervention_duration[df_payment$payment_condition_dollar==0.5], na.rm = TRUE),
    NA,
    se_cont(df_payment$intervention_duration[df_payment$payment_condition_dollar==1], na.rm = TRUE),
    NA,
    se_cont(df_payment$intervention_duration[df_payment$payment_condition_dollar==3], na.rm = TRUE)
    )

combined_intervention <- data.frame(coalesce(median_intervention,se_intervention))



misinfoshare <- c(mean(df_payment$mean_share_misinfo, na.rm = TRUE) , 
                  NA, 
                  mean(df_payment$mean_share_misinfo[df_payment$payment_condition_dollar==0.5], na.rm = TRUE),
                  NA,
                  mean(df_payment$mean_share_misinfo[df_payment$payment_condition_dollar==1], na.rm = TRUE),
                  NA,
                  mean(df_payment$mean_share_misinfo[df_payment$payment_condition_dollar==3], na.rm = TRUE),
                  NA
                  )

se_misinfo <- c(NA, 
    se_cont(df_payment$mean_share_misinfo, na.rm = TRUE),
    NA,
    se_cont(df_payment$mean_share_misinfo[df_payment$payment_condition_dollar==0.5], na.rm = TRUE),
    NA,
    se_cont(df_payment$mean_share_misinfo[df_payment$payment_condition_dollar==1], na.rm = TRUE),
    NA,
    se_cont(df_payment$mean_share_misinfo[df_payment$payment_condition_dollar==3], na.rm = TRUE)
    )

nonmisinfoshare <- c(mean(df_payment$mean_share_nonmisinfo, na.rm = TRUE) , 
                  NA, 
                  mean(df_payment$mean_share_nonmisinfo[df_payment$payment_condition_dollar==0.5], na.rm = TRUE),
                  NA,
                  mean(df_payment$mean_share_nonmisinfo[df_payment$payment_condition_dollar==1], na.rm = TRUE),
                  NA,
                  mean(df_payment$mean_share_nonmisinfo[df_payment$payment_condition_dollar==3], na.rm = TRUE),
                  NA
                  )
se_nonmisinfo <- c(NA,
    se_cont(df_payment$mean_share_nonmisinfo, na.rm = TRUE),
    NA,
    se_cont(df_payment$mean_share_nonmisinfo[df_payment$payment_condition_dollar==0.5], na.rm = TRUE),
    NA,
    se_cont(df_payment$mean_share_nonmisinfo[df_payment$payment_condition_dollar==1], na.rm = TRUE),
    NA,
    se_cont(df_payment$mean_share_nonmisinfo[df_payment$payment_condition_dollar==3], na.rm = TRUE)
    )

combined_nonmisinfo <- data.frame(coalesce(nonmisinfoshare,se_nonmisinfo))

combined_misinfo <- data.frame(coalesce(misinfoshare,se_misinfo))

sharing_discernment <- c(mean(df_payment$sharing_discernment, na.rm = TRUE) , 
                  NA, 
                  mean(df_payment$sharing_discernment[df_payment$payment_condition_dollar==0.5], na.rm = TRUE),
                  NA,
                  mean(df_payment$sharing_discernment[df_payment$payment_condition_dollar==1], na.rm = TRUE),
                  NA,
                  mean(df_payment$sharing_discernment[df_payment$payment_condition_dollar==3], na.rm = TRUE),
                  NA
                  )
se_discernment <- c(NA,
    se_cont(df_payment$sharing_discernment, na.rm = TRUE),
    NA,
    se_cont(df_payment$sharing_discernment[df_payment$payment_condition_dollar==0.5], na.rm = TRUE),
    NA,
    se_cont(df_payment$sharing_discernment[df_payment$payment_condition_dollar==1], na.rm = TRUE),
    NA,
    se_cont(df_payment$sharing_discernment[df_payment$payment_condition_dollar==3], na.rm = TRUE)
    )

combined_discernment <- data.frame(coalesce(sharing_discernment,se_discernment))

name <- data.frame("Group" = c("All", NA, "$0.50", NA, "$1.00", NA, "$3.00", NA))

df_payment <- cbind(name, consented, completed_quiz, completedquiz_consent, combined_intervention, combined_nonmisinfo, combined_misinfo, combined_discernment)

df_payment %>% kable(digits = 3, col.names = c("", "Consented", "Completed Survey", "Completed Quiz/ Consented", "Median Intervention Duration (SE)", "Nonmisformation Sharing (SE)", "Misinformation Share (SE)", "Sharing Discernment (SE)")) |>
      kable_styling(bootstrap_options = c("striped","hover")) |>
      kableExtra::scroll_box( height = "800px")
Consented Completed Survey Completed Quiz/ Consented Median Intervention Duration (SE) Nonmisformation Sharing (SE) Misinformation Share (SE) Sharing Discernment (SE)
All 40628 27114 0.667 16.167 0.704 0.499 0.205
NA NA NA NA 16.313 0.002 0.002 0.002
$0.50 21531 13541 0.629 16.183 0.699 0.497 0.202
NA NA NA NA 17.411 0.003 0.003 0.004
$1.00 9768 6821 0.698 16.383 0.710 0.501 0.209
NA NA NA NA 38.238 0.004 0.005 0.005
$3.00 9329 6752 0.724 15.933 0.707 0.498 0.209
NA NA NA NA 40.972 0.004 0.005 0.005
# Saving this figure as csv

write.csv(df_payment, "data/presentation/payment_amount.csv")

Slide 7

Sharing Intention: Misinfo, Non-misinfo, and Discernment

plot_gen = function(data, palette, ylab = NULL, xlab = NULL, y_min = 0, y_max = 1, title = NULL, 
                    num_size = NULL, baseline = NULL, filename = NULL, baseline_group = "Original Baseline"){
    
    span = y_max - y_min
    data$vert_bar_max = y_max - 0.03 * span * 1:nrow(data)

    data$vert_bar_max[data$label == baseline_group] = max(data$vert_bar_max)

    pic = ggplot(data=data, aes(x=label,y=mean))+
        theme_bw() +
        theme(axis.line.y = element_line(colour="black"),panel.border = element_blank(), 
              panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        legend.title = element_blank(),      
        legend.position = c(.9,.8),legend.justification = c("right", "bottom"), 
        legend.key = element_rect(colour = "transparent"),
        legend.box.just = "right", legend.text = element_text(size=12), legend.margin = margin(6, 6, 6, 6),
        legend.box.background = element_rect( fill="transparent", size=1),legend.background = element_blank()) +

        scale_fill_manual(values = palette$color, breaks = palette$arm_coded, labels = palette$arm_coded) +
        scale_color_manual(values = palette$color, breaks = palette$arm_coded, labels = palette$arm_coded) +
        
        geom_bar(aes(fill = label), stat='identity',width=0.75,color="black") +
        geom_errorbar(aes(ymin=mean-1.96*se, ymax = mean+1.96*se),width=0.1, linewidth = 0.75, position = position_dodge(1)) 
        
        pic = pic + geom_hline(yintercept = 0) +
                coord_cartesian(ylim = c(y_min,y_max)) +
                #y ticks every 0.25 units
                scale_y_continuous(breaks = seq(y_min, y_max, 0.25)) +
                labs(y=ylab,x=xlab)

        # add a vertical line over each bar
        pic = pic + geom_segment(aes(x = label, xend = label, y = mean + .13 * span, yend = vert_bar_max, color = label), size=num_size/4)

        # add horizontal lines
        pic = pic + geom_segment(aes(x = 1, xend = label, y = vert_bar_max, yend = vert_bar_max, color = label), size=num_size/4) 

        # add the test results
        pic = pic + geom_label(data = data |> filter(!is.na(test_diff)), 
        aes(
            x = label, y = vert_bar_max, label = paste0(formatC(test_diff,digits=3,format='f'), "\n(",formatC(test_diff_se,digits=4,format='f'),")")),
            size = num_size
        )

        # add means and SEs
        if (baseline){
          pic = pic + geom_text(aes(label=formatC(mean,digits=4,format="f"),y=mean+sign(mean)*se),vjust = -2.5,size=num_size)+
          geom_text(aes(label=paste("(",formatC(se,digits=4,format="f"),")",sep=""),y=mean+sign(mean)*se),vjust = -1,size=num_size)
        }    
        if (!baseline){
          pic = pic + geom_text(aes(label=as.character(formatC(mean,digits=3,format="f")),y=mean+se),vjust = -1,size=num_size)    
        }
           

        pic = pic +   
          theme(
                axis.text.x = element_text(color = "black", size = 3 * num_size,  angle = 0, hjust = .5, vjust = 5, face = "plain"),
                axis.text.y = element_text(color = "black", size = 3 * num_size, angle = 0, hjust = 0, vjust = .5, face = "plain",
                                    margin=unit(rep(0.5,4),"cm")),  
                axis.title.x = element_text(color = "black", size = 4 * num_size, angle = 0, hjust = .5, vjust = 3, face = "bold"),
                axis.title.y = element_text(color = "black", size = 4 * num_size, angle = 90, hjust = .5, vjust = .5, face = "bold"),
                axis.ticks.length.y = unit(-0.25,"cm"), axis.ticks.x=element_blank(),
                legend.position = "none"
            )

        # add number of observations
        pic = pic +
          geom_text(aes(label = paste0('italic(N) == ', '"', formatC(n_obs, big.mark = ','), '"'),
                    y = 0, hjust = 0.5, vjust = -0.5), color = 'black', size = num_size, check_overlap = TRUE, parse = TRUE)

        # add caption
        total_n_obs <- sum(data$n_obs)
        pic = pic + labs(caption = paste0("N = ", formatC(total_n_obs, big.mark = ","), "; Difference in Means and SEs are calculated relative to the ", baseline_group, " arm."))
    
        # add title if provided

        if (!is.null(title)){
            pic = pic + ggtitle(title) +
            theme(plot.title = element_text(face='bold', size= 4 * num_size, hjust=0.5, vjust=0.5))
        }

    if(!is.null(filename)){
        ggsave(paste0(filename, ".pdf"), pic, width = 2 * num_size, height = 5, dpi = 300)
        ggsave(paste0(filename, ".png"), pic, width = 2 * num_size, height = 5, dpi = 300)
    } else {
        print(pic)
    }
}
wrap_plot_gen = function(outcome, color, ylab = NULL, xlab = NULL, y_min = NULL, y_max = NULL,
                         title = NULL, num_size = NULL, filename = NULL, data = data_main,
                         baseline_group = "Placebo (Original)"){

    dt = data |>
        filter(quiz_completed) |>
        group_by(arm_coded) |>
        summarize(
            mean = mean({{outcome}}),
            se = sd({{outcome}})/sqrt(n()),
            n_obs = n(),
        ) |>
        mutate(
            label = case_when(
            arm_coded == "SMS" ~ "Emotions Course",
            arm_coded == "Long Baseline" ~ "Placebo (Long)",
            arm_coded == "Original Baseline" ~ "Placebo (Original)",
            TRUE ~ as.character(arm_coded)
        ),
        label = factor(label, levels = c("Placebo (Original)", "Placebo (Long)", "Emotions Course",  "Video", "Game")))
    
    dt = dt |> mutate(
        test_diff = mean - dt$mean[dt$label == baseline_group],
        test_diff_se = sqrt(dt$se^2 + dt$se[dt$label == baseline_group]^2)
    ) |>
    mutate(
        test_diff = ifelse(label == baseline_group, NA, test_diff),
        test_diff_se = ifelse(label == baseline_group, NA, test_diff_se)
    )

    plot_gen(dt,theme_colors, baseline = TRUE, num_size = 4, filename = filename, xlab = xlab, ylab = ylab, y_min = y_min, y_max = y_max, title = title, baseline_group = baseline_group)
}

Sharing Intention: Misinformation

sharing_misinformation = wrap_plot_gen(
    outcome = mean_share_misinfo,
    ylab = "Sharing Intention: Misinformation",
    xlab = "Arm",
    y_min = 0,
    y_max = 1.2,
    num_size = 4,
    data = data_main |> filter(phase_coded == "Phase 1"))

## Save as png and pdf files 

ggsave("data/presentation/share_misinfo.pdf")
ggsave("data/presentation/share_misinfo.png")

Sharing Intention: Non-misinformation

sharing_nonmisinformation = wrap_plot_gen(
    outcome = mean_share_nonmisinfo,
    ylab = "Sharing Intention: Non-misinformation",
    xlab = "Arm",
    y_min = 0,
    y_max = 1.2,
    num_size = 4,
    data = data_main |> filter(phase_coded == "Phase 1"))

## Save as png and pdf files

ggsave("data/presentation/share_nonmisinfo.pdf")
ggsave("data/presentation/share_nonmisinfo.png")

Sharing Intention: Discernment

sharing_discernment = wrap_plot_gen(
    outcome = sharing_discernment,
    ylab = "Sharing Intention: Discernment",
    xlab = "Arm",
    y_min = 0,
    y_max = .4,
    num_size = 4,
    data = data_main |> filter(phase_coded == "Phase 1"))

## Save as png and pdf files

ggsave("data/presentation/share_discernment.pdf")
ggsave("data/presentation/share_discernment.png")

Slide 11

Sharing Compared to Original Placebo

run_sdim = function(
    df, depvar, indepvar, stratavar,
    baselinevar = "Original Baseline",
    controls = NULL, fes = NULL, filename = NULL){
    # Run stratified difference in means

    print(paste0("Running SDIM for ", depvar, " ~ ", indepvar, " | ", stratavar))
    # build formula
    fml = paste0(depvar, " ~ i(", indepvar, ", ref = '", baselinevar, "')")
    if(!is.null(controls)){
        fml = paste0(fml, " + ", paste(controls, collapse = " + "))
    }
    if(!is.null(fes)){
        fml = paste0(fml, " | ", paste(fes, collapse = " + "))
    }

    ests_strata = list()
    for(s in unique(df[[stratavar]])){
        # Get ATEs for each strata
        df_s = df[df[[stratavar]] == s, ]

        if( ( nrow(df_s) < 2) | nrow(df_s[df_s[[indepvar]] == baselinevar, ]) < 2 | nrow(df_s[df_s[[indepvar]] != baselinevar, ]) < 2){
            next
        }

        if(is.null(fes)){
            mod_s = feols(as.formula(fml), data = df_s, vcov = "HC1")
        } else {
            mod_s = felm(as.formula(fml), data = df_s) # this case uses clustered standard errors by default
        }
      
        Coefs = as.data.frame(coeftable(mod_s))[c("Estimate", "Std. Error")] 
        Coefs$varnames = rownames(Coefs)
        Coefs$s = s
        Coefs$N_s = mod_s$nobs
        rownames(Coefs) = NULL

        ests_strata[[s]] = Coefs
      }

    # Combine estimates
    ests = bind_rows(ests_strata) |>
        group_by(varnames) |>
        mutate(w_s = N_s/sum(N_s)) |>
        summarize(
            beta = sum(w_s * Estimate),
            se = sqrt(sum(w_s^2 * `Std. Error`^2)),
            ) |>
        mutate(
            outcome = depvar,
            indepvar = indepvar,
            controls = ifelse(is.null(controls), "No", paste(controls, collapse = ", ")),
            fes = ifelse(is.null(fes), "No", paste(fes, collapse = ", ")),
        )
    
    # add number of observations
    # TODO: need to double check that no stratum was skipped, otherwise this number needs to be updated
    df_n_obs = df |>
      filter(quiz_completed) |>
      group_by(arm_coded) |>
      summarize(n_obs = n()) %>%
      mutate(varnames = paste0("arm_coded::", arm_coded)) %>% 
      select(-arm_coded)
    
    ests = left_join(ests, df_n_obs, by = "varnames")
    
    # Export to csv
    if (!is.null(filename)){
        ests |>
            round_and_write_csv(paste0("results/survey/StratifiedATEs/", filename, ".csv"))
    }

    return(ests)
}


plot_te = function(res, filename = NULL, palette = theme_colors, ylims = NULL, n_obs_position = NULL, ylab = "", xlab = "Arm", caption = "Bars represent 95% confidence intervals."){


  
    plt = res |> ggplot(aes(x = Arm, color = Arm)) +
        geom_point(aes(y = beta), size = 2) +
        geom_errorbar(aes(ymin = beta - 1.96 * se, ymax = beta + 1.96* se), width = .1, linewidth = 1) +
        theme_bw() +
        theme(
            axis.line.y = element_line(colour="black"),
            panel.border = element_blank(), 
            panel.grid.major = element_blank(),
            panel.grid.minor = element_blank(),
            legend.position = "none") +
        scale_color_manual(values = palette$color, breaks = palette$arm_coded, labels = palette$arm_coded) +
        scale_y_continuous(limits = ylims, expand = c(0.1, 0)) +
        geom_hline(aes(yintercept = 0)) +
        labs(y = ylab, x = xlab, caption = caption) +
        theme(
            plot.title = element_text(size = 20, face = "bold"),
            axis.title = element_text(size = 20),
            axis.text = element_text(size = 20),
            #increase size of caption text
            plot.caption = element_text(size = 18),
            plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"))
    
    # add number of observations
    if (is.null(n_obs_position)) {
      n_obs_position <- min(res$beta - 1.96 * res$se)
    }
    
    plt = plt +
      geom_text(aes(label = paste0('italic(N) == ', '"', formatC(n_obs, big.mark = ','), '"'),
                    y = n_obs_position, hjust = 0.5, vjust = 1.75), color = 'black', size = 6, check_overlap = TRUE, parse = TRUE)

    if (!is.null(filename)){
        ggsave(paste0(filename, ".pdf"), plot = plt, width = 10, height = 7)
        ggsave(paste0(filename, ".png"), plot = plt, width = 10, height = 7)
    } else {
        return(plt)
    }
}
plot_te_presentation = function(res, filename = NULL, palette = theme_colors, ylims = NULL, n_obs_position = NULL, ylab = "", xlab = "Arm", caption = "Bars represent 95% confidence intervals."){
    plt = res |> ggplot(aes(x = Arm, color = Arm)) +
        geom_point(aes(y = beta), size = 3) +
        geom_errorbar(aes(ymin = beta - 1.96 * se, ymax = beta + 1.96* se), width = .1, linewidth = 1) +
        theme_bw() +
        theme(
            axis.line.y = element_line(colour="black"),
            panel.border = element_blank(), 
            panel.grid.major = element_blank(),
            panel.grid.minor = element_blank(),
            legend.position = "none") +
        scale_color_manual(values = palette$color, breaks = palette$arm_coded, labels = palette$arm_coded) +
        scale_y_continuous(limits = ylims, expand = c(0.1, 0)) +
        geom_hline(aes(yintercept = 0)) +
        labs(y = ylab, x = xlab, caption = caption) +
        theme(
            plot.title = element_text(size = 20, face = "bold"),
            axis.title = element_text(size = 20),
            axis.text = element_text(size = 20),
            #increase size of caption text
            plot.caption = element_text(size = 18),
            plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"))
    
    # add number of observations
    if (is.null(n_obs_position)) {
      n_obs_position <- min(res$beta - 1.96 * res$se)
    }
    
    plt = plt +
      geom_text(aes(label = paste0('italic(N) == ', '"', formatC(n_obs, big.mark = ','), '"'),
                    y = n_obs_position, hjust = 0.5, vjust = 1.75), color = 'black', size = 6, check_overlap = TRUE, parse = TRUE)

    if (!is.null(filename)){
        ggsave(paste0(filename, "_presentation.pdf"), plot = plt, width = 10, height = 7)
        ggsave(paste0(filename, "_presentation.png"), plot = plt, width = 10, height = 7)
    } else {
        return(plt)
    }
}
# plots TEs for misinfo and non-misinfo, differentiated by shape of the dot
plot_te_presentation2 = function(res, filename = NULL, palette = theme_colors, ylims = NULL, n_obs_position = NULL, ylab = "", xlab = "Arm", caption = "Bars represent 95% confidence intervals."){
    plt = res |> ggplot(aes(x = Arm, color = Arm, shape = info_type)) +
        geom_point(aes(y = beta), size = 3, position=position_dodge(width=0.75)) +
        geom_errorbar(aes(ymin = beta - 1.96 * se, ymax = beta + 1.96* se), width = .1, linewidth = 1, position=position_dodge(width=0.75)) +
        theme_bw() +
        theme(
            axis.line.y = element_line(colour="black"),
            panel.border = element_blank(), 
            panel.grid.major = element_blank(),
            panel.grid.minor = element_blank(),
            legend.position = "bottom") +
        scale_color_manual(values = palette$color, breaks = palette$arm_coded, labels = palette$arm_coded) +
        scale_y_continuous(limits = ylims, expand = c(0.1, 0)) +
        geom_hline(aes(yintercept = 0)) +
        labs(y = ylab, x = xlab, caption = caption) +
        guides(color=FALSE) +
        theme(
            plot.title = element_text(size = 20, face = "bold"),
            axis.title = element_text(size = 20),
            axis.text = element_text(size = 20),
            #increase size of caption text
            plot.caption = element_text(size = 18),
            plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
            # increase legend text size
            legend.title=element_blank(), # suppress legend title
            legend.position = c(0.9, 0.95),
            legend.text=element_text(size=18))
    print(plt)
    # add number of observations
    if (is.null(n_obs_position)) {
      n_obs_position <- min(res$beta - 1.96 * res$se)
    }
    
    plt = plt +
      geom_text(aes(label = paste0('italic(N) == ', '"', formatC(n_obs, big.mark = ','), '"'),
                    y = n_obs_position, hjust = 0.5, vjust = 1.75), color = 'black', size = 6, check_overlap = TRUE, parse = TRUE)

    if (!is.null(filename)){
        # change filename to not overwrite files
        ggsave(paste0(filename, "_presentation.pdf"), plot = plt, width = 10, height = 7)
        ggsave(paste0(filename, "_presentation.png"), plot = plt, width = 10, height = 7)
    } else {
        return(plt)
    }
}
#### Both Phases Combined
data_all = data_all |>
    group_by(country_coded, payment_condition_dollar, subphase) |>
    mutate(
        strata = paste0(country_coded, "_$", payment_condition_dollar, "_", subphase)
    )


outcomes = c("mean_share_misinfo", "mean_share_nonmisinfo", "sharing_discernment", "mean_reliable_misinfo", "mean_reliable_nonmisinfo", "reliability_discernment", "mean_manipulative_misinfo", "mean_manipulative_nonmisinfo", "manipulation_discernment")

outcomes_discern = c("sharing_discernment", "reliability_discernment", "manipulation_discernment")

outcomes_info = c("mean_share_")


for (outcome in outcomes_info) {
  
    outcome_misinfo <- paste0(outcome, "misinfo")
    outcome_nonmisinfo <- paste0(outcome, "nonmisinfo")
  
    # Get Estimates
    res_misinfo = run_sdim(
        df = data_all |> filter(quiz_completed) |> filter(arm_coded %in% c("Original Baseline", "Long Baseline", "SMS")),
        depvar = outcome_misinfo,
        indepvar = "arm_coded",
        stratavar = "strata"
    )

    res_nonmisinfo = run_sdim(
        df = data_all |> filter(quiz_completed) |> filter(arm_coded %in% c("Original Baseline", "Long Baseline", "SMS")),
        depvar = outcome_nonmisinfo,
        indepvar = "arm_coded",
        stratavar = "strata"
    )    
    
    # Plot Estimates
    res_misinfo = res_misinfo |>
        filter(str_detect(varnames, "arm_coded")) |>
        mutate(Arm = str_remove(varnames, "arm_coded::")) |>
        mutate(
            Arm = case_when(
                Arm == "SMS" ~ "Emotions Course",
                Arm ==  "Long Baseline" ~ "Placebo (Long)",
                TRUE ~ Arm
        )) |>
        mutate(Arm = factor(Arm, levels = c("Placebo (Long)", "Emotions Course"))) #, "Video", "Game"
    
    res_nonmisinfo = res_nonmisinfo |>
        filter(str_detect(varnames, "arm_coded")) |>
        mutate(Arm = str_remove(varnames, "arm_coded::")) |>
        mutate(
            Arm = case_when(
                Arm == "SMS" ~ "Emotions Course",
                Arm ==  "Long Baseline" ~ "Placebo (Long)",
                TRUE ~ Arm
        )) |>
        mutate(Arm = factor(Arm, levels = c("Placebo (Long)", "Emotions Course"))) #, "Video", "Game"
    
    res = rbind(res_misinfo %>% mutate(info_type = "Misinfo"), 
                res_nonmisinfo %>% mutate(info_type = "Nonmisinfo"))
    
    ylims <- c(min(res$beta - 1.96 * res$se), max(res$beta + 1.96 * res$se))
    n_obs_position <- min(res$beta - 1.96 * res$se)
    
  plot_te_presentation2(
        res,
        ylims = ylims,
        n_obs_position = n_obs_position,
        file = paste0("data/presentation", paste0(str_sub(outcome, end = -2), "_both_phases")),
        ylab = paste0("ATE on ", dict[outcome])
    )
    
    ggsave(paste0("data/presentation/", paste0(str_sub(outcome, end = -2), "_both_phases"), ".pdf"))
    ggsave(paste0("data/presentation/", paste0(str_sub(outcome, end = -2), "_both_phases"), ".png"))
}
## [1] "Running SDIM for mean_share_misinfo ~ arm_coded | strata"
## [1] "Running SDIM for mean_share_nonmisinfo ~ arm_coded | strata"

Sharing compared to pooled placebos with Lee Bounds

plot_lee_bounds = function(outcome, survey, title = NULL, spec = "With Covars"){

    res = lee_bounds_results |> 
        filter(Outcome == outcome, Survey == survey, Spec == spec) |>
        mutate(
            Type = case_when(
                str_detect(Type, "Misinfo") ~ "Misinformation",
                str_detect(Type, "Non-misinfo") ~ "Non-misinformation",
                TRUE ~ Type
            )
        )

    # Plot with ggplot
    plt = ggplot(res, aes(x = lb, y = Type)) +
        labs(x = "Treatment Effects of Emotions Course vs Placebo (combined)", y = "Outcome", color = "") +
        theme_minimal() +
        theme(plot.title = element_text(size = 20, face = "bold"),
            axis.title = element_text(size = 18),
            axis.text = element_text(size = 15),
            legend.text = element_text(size = 18),
            plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm")) +
        geom_errorbarh(aes(xmin = ate_ci_low, xmax = ate_ci_high), height = 0.2, color = "steelblue", linewidth = 0.8) +
        geom_point(aes(x = lb), color = "firebrick", size = 2, shape = 17) +
        geom_point(aes(x = ub), color = "firebrick", size = 2, shape = 17) +
        geom_vline(xintercept = 0, linetype = "dashed", color = "grey")

}

a <- plot_lee_bounds("Sharing", "Main", title = "Sharing Intention: Main Survey")
ggsave("data/presentation/sharing_intention_main.pdf", a, width = 10, height = 7)
ggsave("data/presentation/sharing_intention_main.png", a, width = 10, height = 7)

Slide 12

Discernment compared to orginal placebo

outcome <- "sharing_discernment"

### Main survey
data_all = data_all |>
    group_by(country_coded, payment_condition_dollar, subphase) |>
    mutate(strata = paste0(country_coded, "_$", payment_condition_dollar, "_", subphase))

# Get Estimates
res_main = run_sdim(
  # exclude game and video conditions at this stage
  df = data_all |> filter(quiz_completed) |> 
    # filter conditions
    filter(arm_coded %in% c("Original Baseline", "Long Baseline", "SMS")),
  depvar = outcome,
  indepvar = "arm_coded",
  stratavar = "strata"
  # specify no filename to avoid overwriting files
)
## [1] "Running SDIM for sharing_discernment ~ arm_coded | strata"
# Plot Estimates
res_main = res_main |>
  filter(str_detect(varnames, "arm_coded")) |>
  mutate(Arm = str_remove(varnames, "arm_coded::")) |>
  mutate(
    Arm = case_when(
      Arm == "SMS" ~ "Emotions Course",
      Arm ==  "Long Baseline" ~ "Placebo (Long)",
      TRUE ~ Arm
    )) |>
  mutate(Arm = factor(Arm, levels = c("Placebo (Long)", "Emotions Course")))

### Follow-up Survey

data_followup = data_all |> filter(quiz_completed_followup) |> 
  # filter conditions
  filter(arm_coded %in% c("Original Baseline", "Long Baseline", "SMS")) |> 
    group_by(country_coded, payment_condition_dollar, subphase) |>
    mutate(strata = paste0(country_coded, "_$", payment_condition_dollar, "_", subphase))

res_followup = run_sdim(
  df = data_followup,
  depvar = paste0(outcome, "_followup"),
  indepvar = "arm_coded",
  stratavar = "strata",
)
## [1] "Running SDIM for sharing_discernment_followup ~ arm_coded | strata"
res_followup = res_followup |>
  filter(str_detect(varnames, "arm_coded")) |>
  mutate(Arm = str_remove(varnames, "arm_coded::")) |>
  mutate(Arm = case_when(
    Arm == "SMS" ~ "Emotions Course",
    Arm ==  "Long Baseline" ~ "Placebo (Long)",
    TRUE ~ Arm
  )) |>
  mutate(Arm = factor(
    Arm,
    levels = c("Placebo (Long)", "Emotions Course")
  ))

res_combined <- rbind(res_main %>% mutate(Arm = paste0(Arm, "\nMain")),
                      res_followup %>% mutate(Arm = paste0(Arm, "\nFollow-Up"))) |>
  mutate(Arm = factor(Arm, levels = c("Placebo (Long)\nMain", "Placebo (Long)\nFollow-Up", "Emotions Course\nMain", "Emotions Course\nFollow-Up")))
theme_colors_combined <- tibble(arm_coded = c("Placebo (Long)\nMain", "Placebo (Long)\nFollow-Up", "Emotions Course\nMain", "Emotions Course\nFollow-Up"),
                                color = c("#1E88E5", "#0f4e85", "#4CAF50","#154f25"))
# plots TEs for main and follow up, differentiated by x-axis label and color
res <- res_combined
palette <- theme_colors_combined # to update
ylab <- ""
xlab <- "Arm"
caption <- "Bars represent 95% confidence intervals."
filename <- paste0("data/presentation", paste0(outcome, "_both_phases"))

ylims = c(-0.1, 0.2)
plt = res %>%
  ggplot(aes(x = Arm, color = Arm)) +
  geom_point(aes(y = beta),
             size = 2,
             position = position_dodge(width = 0.75)) +
  geom_errorbar(
    aes(ymin = beta - 1.96 * se, ymax = beta + 1.96 * se),
    width = .1,
    linewidth = 1,
    position = position_dodge(width = 0.75)
  ) +
  theme_bw() +
  theme(
    axis.line.y = element_line(colour = "black"),
    panel.border = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    legend.position = "bottom"
  ) +
        scale_color_manual(values = palette$color, breaks = palette$arm_coded, labels = palette$arm_coded) +
  scale_y_continuous(limits = ylims, expand = c(0.1, 0)) +
  geom_hline(aes(yintercept = 0)) +
  labs(y = ylab, x = xlab, caption = caption) +
  guides(color = FALSE) +
  theme(
    plot.title = element_text(size = 20, face = "bold"),
    axis.title = element_text(size = 20),
    axis.text = element_text(size = 12),
    #increase size of caption text
    plot.caption = element_text(size = 15),
    plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
    # increase legend text size
    legend.title = element_blank(),
    # suppress legend title
    legend.position = c(0.9, 0.95),
    legend.text = element_text(size = 12)
  ) +
  guides(shape = guide_legend(reverse = TRUE, override.aes = list(color = c("grey40", "black"))))
    
# add number of observations
n_obs_position <- min(res$beta - 1.96 * res$se)

b = plt +
  geom_text(
    aes(
      label = paste0('italic(N) == ', '"', formatC(n_obs, big.mark = ','), '"'),
      y = n_obs_position,
      hjust = 0.5,
      vjust = 1.75
    ),
    color = 'black',
    size = 4,
    position=position_dodge(width = 0.75),
    check_overlap = FALSE,
    parse = TRUE
  )

ggsave(paste0("data/presentation/", paste0(outcome, "_both_phases"), ".pdf"), b, width = 10, height = 7)
ggsave(paste0("data/presentation/", paste0(outcome, "_both_phases"), ".png"), b, width = 10, height = 7)


print(b)

Discernment Compared to Orginal Placebo with Lee Bounds

lee_bounds_results = lee_bounds_results |> mutate(
    Survey = factor(Survey, levels = c("Main", "Follow-up")))

plot_lee_bounds_combined = function(outcome, title = NULL, spec = "With Covars"){

    res = lee_bounds_results |> filter(Outcome == outcome, Spec == spec) |>
            mutate(
            Type = case_when(
                str_detect(Type, "Misinfo") ~ "Misinformation",
                str_detect(Type, "Non-misinfo") ~ "Non-misinformation",
                TRUE ~ Type
            )
        )

    # Plot with ggplot
    plt = ggplot(res, aes(x = lb, y = Type, color = Survey)) +
        geom_errorbarh(aes(xmin = ate_ci_low, xmax = ate_ci_high), height = 0.2, size = 0.8, position = position_dodge(width = 0.5)) +
        geom_point(aes(x = lb), size = 2, shape = 17, position = position_dodge(width = 0.5)) +
        geom_point(aes(x = ub), size = 2, shape = 17, position = position_dodge(width = 0.5)) +
        geom_vline(xintercept = 0, linetype = "dashed", color = "grey") +
        labs(x = "Treatment Effects of Emotions Course vs Placebo (combined)", y = "Outcome", color = "") +
        theme_minimal() +
        theme(plot.title = element_text(size = 16, face = "bold"),
            axis.title = element_text(size = 14),
            axis.text = element_text(size = 10),
            plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm"),
            legend.position = c(0.8, 0.9),
            #make legend text larger
            legend.text = element_text(size = 14),
            ) +
        scale_color_manual(values = c("#1f77b4", "#ff7f0e"))

    if(!is.null(title)){
        plt = plt + ggtitle(title)
    } else {
        plt = plt + ggtitle(paste0("Lee Bounds: ", outcome, " - ", survey))
    }


}

c <- plot_lee_bounds_combined("Sharing", title = "Sharing Intention")
ggsave("data/presentation/sharing_intention_leebounds.pdf", c, width = 10, height = 7)
ggsave("data/presentation/sharing_intention_leebounds.png", c, width = 10, height = 7)

print(c)