# cleaned chatfuel data
df <- read.csv("~/Documents/git/fb_misinfo_interventions/data/cleaned_data.csv") 
# data dictionary 
data_dictionary <- read.csv("~/Documents/git/fb_misinfo_interventions/data/data_dictionary.csv")
# ads data by ad
ads <- read.csv("~/Documents/git/fb_misinfo_interventions/data/cleaned_ads.csv")
# ads data by age and gender
ads_demo <- read.csv("~/Documents/git/fb_misinfo_interventions/data/cleaned_ads_age_gender.csv")

# order of variables by chatbot-treatment combination
detailed_funnel <- read_sheet("https://docs.google.com/spreadsheets/d/1XqKAkhG0-3621pr8kYWxa-MhnkiLC-Og4eO1LXT2bb0/edit#gid=371270037")

# this has ggplot theme preset
source(paste0("https://raw.githubusercontent.com/scmcdonald/visualization/main/theme_sarah.R"))

# load function that defines factor levels so tables and figures have correct ordering
source("~/Documents/git/fb_misinfo_interventions/pilot_analysis/factor_cols.R")
df <- factor_cols(df)

# define discrete colorblind palette
cb_colors <- brewer.pal(n = 8, name = "Dark2")
cb_colors_extra <- brewer.pal(n = 8, name = "Set2")
cb_colors <- c(cb_colors, cb_colors_extra)
# format numbers to percent
form_percent <- function(dec){
  if(is.na(dec)){
    return("-")
  } else {
    return(paste0(round(dec, 4) * 100, "%"))
  }
}

# format number dollar type
form_cost <- function(num){
  
  sapply(num, function(x) if (x == Inf| is.na(x)) {
    return("-")
  } else {
    return(paste0("$", round(x, 3)))
  })
  
}

# round number to 4 with trailing 0s
# converts to string
round_4 <- function(x){
  formatC(x, digits = 4, format = "f")
}

# format number with commas for 
# large numbers without decimals
form_num <- function(num){
  
  sapply(num, function(x) if (x == Inf| is.na(x)) {
    return("-")
  } else {
    return( formatC(x, big.mark = ","))
  })
  
}

# this puts sms to all caps and control to Control
label_fun <- function(x){
    ifelse(nchar(x) <4 & x != "all", toupper(x), str_to_title(x))
  }

1 Project Objectives

The goal of the Facebook Misinformation Project is to experimentally evaluate scaled versions of promising interventions to reduce misinformation spread on social media using on-platform behavioral data.

2 Pilot Objectives

The pilot has four objectives:

  1. Determine a range of cost per impression and funnel parameters (ad CTR and completion rates at each stage of the chatbot);

  2. Evaluate the performance of ads;

  3. Test different versions of the chatbot intro section;

  4. Test the treatment performance (core intervention: SMS course + content replacement + consequences nudge) on survey proxy outcomes delivered through the chatbot

3 Purpose of Analysis Script

The analysis script cleans and analyzes pilot data from Ads Manager, Chatfuel, and chatbot responses. The output includes the following tables:

  • Descriptive Statistics

  • Funnel Statistics

  • Treatment Analysis

4 Samples

4.1 Targeting Strategy

Our targeting strategy is estimated to target 53 million FB users, which is 22% of total FB users in the US (240 million). The pilot target users (1) in the union of the top 10 states that are most conservative + top 20 most rural states (2) OR in the top 2,500 largest ZIP codes by population in other states that have high republican votes (# votes for Republican candidates / # total votes > 95%) or high rural population rate (% rural population > 85%) (3) Among users in (1) or (2), target users with age 35+.

4.2 Randomization Strategy

4.2.1 Ad-Level

The pilot includes samples to test 9 versions of ads - Outrage 1, Outrage 2, Attention 1, Attention 2, Real 1, Real 2, Fake News 1, Fake News 2, Fake.

4.2.1.1 Outrage

4.2.1.1.1 Ad 1

4.2.1.1.2 Ad 2

4.2.1.2 Attention

4.2.1.2.1 Ad 1

4.2.1.2.2 Ad 2

4.2.1.3 Real

4.2.1.3.1 Ad 1

4.2.1.3.2 Ad 2

4.2.1.4 Fake News

4.2.1.4.1 Ad 1

4.2.1.4.2 Ad 2

4.2.1.5 Fake

4.2.2 Chatbot-Level

After users click on the ads, they are randomized to be connected with one of the 4 chatbot conditions: two pre-consent chat conditions to test two different factoids (Factoid 1, Factoid 2), and two post-consent intro conditions to test the performance of the intro section with vs. without an additional motivator (Consent only, Motivator).

1a Consent only no factoid no motivator
1b Motivator no factoid motivator
2a Factoid 1 factoid 1 no motivator
2b Factoid 2 factoid 2 no motivator

4.2.3 Intervention-Level

After users consent to participate in the study, they are randomized into an intervention group and a control group. The intervention group is exposed to the SMS course, consequences nudge and content replacement; the control group is exposed to the facts baseline course only.

5 Main Takeaways

5.1 Overall Pilot Performance

  • Over a period of one week, we loaded 9 ads and got 53,876 impressions at a total cost of $2000.

  • Cost per impression: Overall, the cost per impression is $0.037. Across the 9 ads, the cost per impression ranges from $0.019 to $0.043.

    • Compared with the VH project (campaign in African countries w/ incentives), our cost per impression is an order of magnitude more expensive (cost per impression for the VH project is $0.003).

    • Our cost is also higher than the average FB ads cost from this article ($0.012).

  • Funnel statistics:

    • Conversation start rate (# conversations started / # impressions) = 0.27%

    • Consent rate (# consents / # conversations started) = 41.96%

    • Compared with the VH project, our conversation start rate and consent rate are slightly lower, and completion rate is much lower. Funnel stats for the VH project (pilot 8): conversation start rate=0.37%, consent rate=58.3%, completion rate=81.6%.

  • Our ads are getting a lot of engagement: total engagement = 1,355 (incl. ads comments=386, ads reactions=300, ads shares=65), and engagement rate (# engagement / # impressions) is 2.5%.

5.2 Performance of ads

  • Summary of ads found here.

  • Number of impressions is determined by the FB algorithm, where users’ likelihood to engage is an important factor. According to this metric, FB algorithm determines that the ad Fake and Outrage 2 perform the best. However, costs per impression for the ad Fake and Outrage 2 are also the highest across the ads.

  • Considering both number of impressions and cost per impression, the ad Real 1 and Real 2 perform the best - they rank the 4th and 5th ads to obtain the most impressions, and the 3rd and the 4th ads with the lowest cost per impression.

  • Another metric to evaluate the ads is conversation start rate (# conversations started / # impressions) to understand which ad is most likely to attract users to start a conversation with our chatbot. Using this metric, the ad Real 1 and the ad Fake perform the best (Real 2 is also not bad).

  • Lessons learned based on ads comments:

    • People think OTHER people are being manipulated online, not themselves. The ads incite a bit of defensiveness as users think we are claiming they are the ones being manipulated.

      • We can do better to play their sense of superiority into the framing.
      • We may want to reevaluate our project name “Project Don’t Get Duped” (“Project DGD” may work better).
    • The general conspiratorial mindset of our target sample means that they are quick to assume we are also out to manipulate them, or that we are going to position ourselves as the TRUTH.

      • We want to avoid phrases like “let us help”, or the term “disinformation” (It appears to be understood to mean “things I believe are true that you don’t want me to believe” for deep conspiracists).
    • There’s a valid question/concern from several commenters that we are also an anonymous group with unclear/suspicious motives.

      • We need to think about ways to improve our credibility, and avoid ads that may diminish our broader credibility.
    • Users engage with us through ads comments, rather than entering the chatbot.

      • We need to make “Send us a message” more salient.

5.3 Performance of different chatbot intro sections

  • Consent rate is higher without factoids. Treatment/chatbot completion rates are not significantly higher with the additional motivator section.

    • The results imply that we want to make the intro section as concise as possible, and lead users to the intervention as soon as possible.
  • Lessons learned from chatbot conversations:

    • People do not understand how to interact with the chatbot (e.g. using pre-set buttons, inputting a number as responses).

      • We will make the instructions more clear from the very beginning (such as “Tap an option”). If a user gets stuck on a question twice, or sends free-text messages, the chatbot will proceed.
    • Some people only came in to send their thoughts or expect to chat with a real person - they do not like to follow the chat flow we have designed.

      • We need to think about ways to make the chatbot more engaging.

      • It may help to include the fact that this is a chatbot rather than a real person in the ads. The idea of chatbot is less intimidating, and has the advantage of setting expectations appropriately (Some users felt “played” or suspicious when they found it was a chatbot instead of a real person inside the chat).

5.4 Treatment performance on survey proxy outcomes

  • On average, results show that users who are exposed to the SMS intervention are less likely to share misinfo posts but more likely to share non-misinfo (baseline) posts than users in the control group (not statistically significant).

  • On the sharing discernment measure, which is defined as the difference between the likelihood rating of sharing a misinfo post and that of sharing a baseline post, we found significant improvement among users exposed to the SMS intervention.

  • Note that the sample size we collected is very low, and therefore these may not be representative comparisons.

6 Data Dictionary

The data dictionary below outlines each variable used in analysis from the Chatfuel dataset. Group denotes the section of the chatbot. Derived denotes if this variable was created during the cleaning process.

# print data dictionary
data_dictionary %>%
  select(cols, description,values, group, type, derived) %>%
  datatable(rownames = F)

7 Descriptive Statistics

7.1 Numeric Variables

This table reports the mean, standard deviations, and total number of non-missing observations for numeric variables. “All” denotes control and SMS arms together, note that individuals without an arm assignment are not included in these tables. Participants who do not consent to the study do not receive a treatment assignment.

7.1.1 Demographics

# summary for all treatments
df %>%
  select(data_dictionary[data_dictionary$group  == "demographics" & data_dictionary$numeric  == 1, "cols"], treatment) %>%
  select(!demog_completed) %>%
  # remove observations without treatment
  filter(treatment != "no treatment") %>%
  summarise_all(list(n= function(x) sum(!is.na(x)), 
                     mean = function(x) mean(x, na.rm = T), 
                     sd = function(x) sd(x, na.rm = T)))%>%
  pivot_longer(cols = everything(), 
               names_to = "variable",
               values_to = "value") %>% 
  separate(variable, into = c("variable", "measure"), sep="_(?=[^_]+$)") %>%
  pivot_wider(names_from = c("measure")) %>% 
  # summary by treatments
  left_join(
    df %>%
      select(data_dictionary[data_dictionary$group  == "demographics" & data_dictionary$numeric  == 1, "cols"], treatment) %>%
      filter(treatment != "no treatment") %>%
      select(!demog_completed) %>%
      group_by(treatment) %>%
      # get n, mean, sd by treatment
      summarise_all(list(n = function(x)sum(!is.na(x)), 
                         mean = function(x)mean(x, na.rm = T), 
                         sd = function(x)sd(x, na.rm = T))) %>%
      pivot_longer(cols = !treatment, 
                   names_to = "variable",
                   values_to = "value") %>% 
      separate(variable, into = c("variable", "measure"), sep="_(?=[^_]+$)") %>%
      pivot_wider(names_from = c("treatment", "measure")), by = "variable") %>%
  # round mean and sd to 4 digits but not n
  mutate_at(vars(contains(c("mean", "sd"))), round_4) %>%
  filter(variable != "treatment")  %>%
  kable(col.names = c("Variable", rep(c("N", "Mean", "SD"), 3))) %>%
  add_header_above(c(" " = 1, "All" = 3, "Control" = 3, "SMS" = 3))%>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
All
Control
SMS
Variable N Mean SD N Mean SD N Mean SD
demog_age 23 49.6087 10.5000 9 48.7778 7.9652 14 50.1429 12.1139
demog_education_num 21 2.6667 1.0165 8 2.8750 0.9910 13 2.5385 1.0500
demog_income_num 17 1.6471 1.3666 6 1.6667 1.0328 11 1.6364 1.5667
demog_political_num 16 1.7500 0.5774 6 1.6667 0.5164 10 1.8000 0.6325

7.1.2 Funnel Numeric

df %>%
  select(data_dictionary[data_dictionary$funnel == 1   & data_dictionary$numeric  == 1, "cols"], treatment) %>%
      filter(treatment != "no treatment")%>%
  summarise_all(list(n= function(x) sum(!is.na(x)), mean = function(x) mean(x, na.rm = T), sd = function(x) sd(x, na.rm = T)))%>%
  pivot_longer( cols = everything(), 
                names_to = "variable",
                values_to = "value") %>% 
  separate(variable, into = c("variable", "measure"), sep="_(?=[^_]+$)") %>%
  pivot_wider(names_from = c( "measure")) %>% 
  left_join(
    df %>%
      select(data_dictionary[data_dictionary$funnel == 1 & data_dictionary$numeric  == 1, "cols"], treatment) %>%
        filter(treatment != "no treatment") %>%
      group_by(treatment) %>%
      summarise_all(list(n= function(x)sum(!is.na(x)), mean = function(x)mean(x, na.rm = T), sd = function(x)sd(x, na.rm = T))) %>%
      pivot_longer( cols = !treatment, 
                    names_to = "variable",
                    values_to = "value") %>% 
      separate(variable, into = c("variable", "measure"), sep="_(?=[^_]+$)") %>%
      pivot_wider(names_from = c("treatment", "measure")), by = "variable") %>%
    # round mean and sd to 4 digits but not n
  mutate_at(vars(contains(c("mean", "sd"))), round_4) %>%
  filter(variable != "treatment")%>%
  # manually order variables
  mutate(variable = factor(variable, levels = c(
    "consent", 
    "intro_completed", 
    "quiz_completed",
    "control_arm_completed", 
    "consequence_nudge_completed", 
    "content_replacement_completed",
    "intervention_arms_completed", 
    "feedback_completed", 
    "demog_completed", 
    "full_completed"
  ))) %>% 
  # input "-" where variable is not recorded for a certain treatment
  mutate(control_mean = ifelse(variable %in% c("consequence_nudge_completed", "content_replacement_completed", "intervention_arms_completed"), "-", control_mean),
         control_sd = ifelse(variable %in% c("consequence_nudge_completed", "content_replacement_completed", "intervention_arms_completed"), "-", control_sd), 
         control_n = ifelse(variable %in% c("consequence_nudge_completed", "content_replacement_completed", "intervention_arms_completed"), "-", control_n), 
         sms_mean = ifelse(variable == "control_arm_completed", "-", sms_mean), 
         sms_sd = ifelse(variable == "control_arm_completed", "-", sms_sd),
         sms_n = ifelse(variable == "control_arm_completed", "-", sms_n))%>%
  arrange(variable) %>%
  kable(col.names = c("Variable", rep(c("N", "Mean", "SD"), 3))) %>%
  add_header_above(c(" " = 1, "All" = 3, "Control" = 3, "SMS" = 3))%>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
All
Control
SMS
Variable N Mean SD N Mean SD N Mean SD
consent 60 1.0000 0.0000 28 1.0000 0.0000 32 1.0000 0.0000
intro_completed 60 0.3667 0.4860 28 0.0000 0.0000 32 0.6875 0.4709
quiz_completed 60 0.2833 0.4544 28 0.0000 0.0000 32 0.5312 0.5070
control_arm_completed 60 0.2833 0.4544 28 0.6071 0.4973
consequence_nudge_completed 60 0.2833 0.4544
32 0.5312 0.5070
content_replacement_completed 60 0.2667 0.4459
32 0.5000 0.5080
intervention_arms_completed 60 0.3000 0.4621
32 0.5625 0.5040
feedback_completed 60 0.4000 0.4940 28 0.3571 0.4880 32 0.4375 0.5040
demog_completed 60 0.3667 0.4860 28 0.2857 0.4600 32 0.4375 0.5040
full_completed 60 0.3500 0.4810 28 0.2500 0.4410 32 0.4375 0.5040

7.1.3 Outcome

# outcome summary all treatments
df %>%
  select(sharing_baseline, sharing_misinfo, sharing_disc) %>%
  summarise_all(list(n= function(x) sum(!is.na(x)), mean = function(x) mean(x, na.rm = T), sd = function(x) sd(x, na.rm = T)))%>%
  pivot_longer( cols = everything(), 
                names_to = "variable",
                values_to = "value") %>% 
  separate(variable, into = c("variable", "measure"), sep="_(?=[^_]+$)") %>%
  pivot_wider(names_from = c( "measure")) %>% 
  # outcome summary by treatment
  left_join(
    df %>%
      select(sharing_baseline, sharing_misinfo, sharing_disc, treatment) %>%
      filter(treatment != "no treatment") %>%
      group_by(treatment) %>%
      summarise_all(list(n= function(x)sum(!is.na(x)), mean = function(x)mean(x, na.rm = T), sd = function(x)sd(x, na.rm = T))) %>%
      pivot_longer( cols = !treatment, 
                    names_to = "variable",
                    values_to = "value") %>% 
      separate(variable, into = c("variable", "measure"), sep="_(?=[^_]+$)") %>%
      pivot_wider(names_from = c("treatment", "measure")), by = "variable") %>%
  # round mean and sd to 4 digits but not n
  mutate_at(vars(contains(c("mean", "sd"))), round_4) %>%
  kable(col.names = c("Variable", rep(c("N", "Mean", "SD"), 3))) %>%
  add_header_above(c(" " = 1, "All" = 3, "Control" = 3, "SMS" = 3))%>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
All
Control
SMS
Variable N Mean SD N Mean SD N Mean SD
sharing_baseline 29 2.5690 1.1705 13 2.2692 1.0919 16 2.8125 1.2093
sharing_misinfo 29 1.7759 1.0228 13 2.0385 1.2823 16 1.5625 0.7274
sharing_disc 29 -0.7931 1.1380 13 -0.2308 0.9268 16 -1.2500 1.1106

7.1.4 Other Numeric

# summary overall
df %>%
  select(data_dictionary[data_dictionary$funnel != 1 & data_dictionary$group != "demographics"   & data_dictionary$numeric  == 1, "cols"]) %>% 
  select(!sharing_baseline& !sharing_misinfo &  !sharing_disc  & !chatfuel_user_id) %>%
  summarise_all(list(n= function(x) sum(!is.na(x)), mean = function(x) mean(x, na.rm = T), sd = function(x) sd(x, na.rm = T)))%>%
  pivot_longer( cols = everything(), 
                names_to = "variable",
                values_to = "value") %>% 
  separate(variable, into = c("variable", "measure"), sep="_(?=[^_]+$)") %>%
  pivot_wider(names_from = c( "measure")) %>% 
  # summary by treatments
  left_join(
    df %>%
      select(data_dictionary[data_dictionary$funnel != 1 & data_dictionary$group != "demographics" & data_dictionary$numeric  == 1, "cols"], treatment) %>%
  filter(treatment != "no treatment") %>%
      group_by(treatment) %>%
      summarise_all(list(n= function(x)sum(!is.na(x)), mean = function(x)mean(x, na.rm = T), sd = function(x)sd(x, na.rm = T))) %>%
      pivot_longer( cols = !treatment, 
                    names_to = "variable",
                    values_to = "value") %>% 
      separate(variable, into = c("variable", "measure"), sep="_(?=[^_]+$)") %>%
      pivot_wider(names_from = c("treatment", "measure")), by = "variable") %>%
    # round mean and sd to 4 digits but not n
  mutate_at(vars(contains(c("mean", "sd"))), round_4)  %>% 
  # if NaN or NA, replace with "-"
  mutate_all(~ replace(., .=="  NaN" | . == "   NA", "-")) %>%
  kable(col.names = c("Variable", rep(c("N", "Mean", "SD"), 3))) %>%
  add_header_above(c(" " = 1, "All" = 3, "Control" = 3, "SMS" = 3))%>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
All
Control
SMS
Variable N Mean SD N Mean SD N Mean SD
control 60 0.4667 0.5031 28 1.0000 0.0000 32 0.0000 0.0000
control_quiz_1 27 0.8519 0.3620 27 0.8519 0.3620 0
control_quiz_2 25 0.8000 0.4082 25 0.8000 0.4082 0
control_quiz_3 22 0.7273 0.4558 22 0.7273 0.4558 0
control_quiz_4 14 0.6429 0.4972 14 0.6429 0.4972 0
has_technical_issues 24 0.0417 0.2041 10 0.0000 0.0000 14 0.0714 0.2673
headline1_answer 17 0.7059 0.4697 0
17 0.7059 0.4697
headline2_answer 17 0.8824 0.3321 0
17 0.8824 0.3321
intro_go_on 143 0.1538 0.3621 28 0.0000 0.0000 32 0.6875 0.4709
outcome_proxy_complete 143 0.2028 0.4035 28 0.4643 0.5079 32 0.5000 0.5080
outcome_proxy_quiz1 29 2.2759 1.3601 13 1.8462 1.2142 16 2.6250 1.4083
outcome_proxy_quiz2 29 2.2759 1.2506 13 2.4615 1.3301 16 2.1250 1.2042
outcome_proxy_quiz3 29 1.8966 1.0805 13 2.0769 1.3205 16 1.7500 0.8563
share_chatbot 21 0.4762 0.5118 7 0.2857 0.4880 14 0.5714 0.5136
sms_quiz 22 0.7273 0.4558 0
22 0.7273 0.4558
try_me 143 0.1189 0.3248 28 0.0000 0.0000 32 0.5312 0.5070
what_bother_1 32 0.0938 0.2961 0
32 0.0938 0.2961
what_bother_2 32 0.0312 0.1768 0
32 0.0312 0.1768
what_bother_3 32 0.0000 0.0000 0
32 0.0000 0.0000
what_bother_4 32 0.0312 0.1768 0
32 0.0312 0.1768
what_bother_5 32 0.0312 0.1768 0
32 0.0312 0.1768
what_bother_6 32 0.6250 0.4919 0
32 0.6250 0.4919

7.2 Categorical Variables

This table reports the distribution of observations in each category for categorical variables. Note that individuals without an arm assignment are not included in these tables.

7.2.1 Demographics

#function to calculate count and shares of categorical variables
cat_summary <- function(variable){
  
  var <- sym(variable)
  
  df %>%
    filter(treatment != "no treatment") %>%
    select(!!var) %>%
    group_by(!!var) %>%
    count(name = "Count") %>%
    ungroup() %>%
    mutate(Share = round(Count/sum(Count), 2))%>%
    kable() %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
  
}

7.2.1.1 Gender

cat_summary("demog_gender") 
demog_gender Count Share
woman 9 0.15
non-binary 1 0.02
man 12 0.20
NA 38 0.63

7.2.1.2 Age Group

cat_summary("demog_age_group") 
demog_age_group Count Share
25-34 1 0.02
35-44 7 0.12
45-54 8 0.13
55-64 6 0.10
65+ 1 0.02
NA 37 0.62

7.2.1.3 Ethnicity

cat_summary("demog_ethnicity")
demog_ethnicity Count Share
not hispanic/latino 19 0.32
prefer not to say 3 0.05
NA 38 0.63

7.2.1.4 Race

cat_summary("demog_ethnicity")
demog_ethnicity Count Share
not hispanic/latino 19 0.32
prefer not to say 3 0.05
NA 38 0.63

7.2.1.5 Income

cat_summary("demog_income")
demog_income Count Share
less than $30,000 13 0.22
$50,000 - $69,999 3 0.05
more than $150,000 1 0.02
prefer not to say 4 0.07
NA 39 0.65

7.2.1.6 Education

cat_summary("demog_education")
demog_education Count Share
< high school 3 0.05
high school 5 0.08
some college 10 0.17
2-year degree 2 0.03
4-year degree 1 0.02
prefer not to say 1 0.02
NA 38 0.63

7.2.1.7 Political

cat_summary("demog_political")
demog_political Count Share
democrat 1 0.02
independent 10 0.17
republican 5 0.08
something else 6 0.10
NA 38 0.63

7.2.2 Other Categorical

7.2.2.1 Arm

The arm variable denotes participants’ treatment assignment.

cat_summary("treatment")
treatment Count Share
control 28 0.47
sms 32 0.53

7.2.2.2 Family Friend Check

Answer to the question: ❓How about you? If a family member or a friend posts something online, do you stop to check whether it’s misinformation?

A. I always trust my family and friends!

B. I always check whether something is misinformation.

cat_summary("family_friend_check")
family_friend_check Count Share
a 3 0.05
b 17 0.28
NA 40 0.67

7.2.2.3 Fear Quiz

Answer to the question: ❓What motive might someone have for wanting you to believe the ingredients of another product are safer?

A. To sell their product.

B. Because they genuinely think the other product is dangerous.

C. All of the above.

cat_summary("fear_quiz")
fear_quiz Count Share
a 7 0.12
c 12 0.20
NA 41 0.68

7.2.2.4 Anger Quiz

Answer to the question: Would you share this [anger inducing ad] on your social media feed?

A. Yes

B. No

cat_summary("anger_quiz")
anger_quiz Count Share
  1. yes
4 0.07
  1. no
15 0.25
NA 41 0.68

7.2.2.5 Per Unfollow

Answer to the question: What percentage of people do you think have unfollowed someone who shared misinformation?

10%

25%

50% or more

cat_summary("per_unfollow")
per_unfollow Count Share
10% 6 0.10
25% 3 0.05
50% or more 8 0.13
NA 43 0.72

7.2.2.6 Dog or Cat

Answer to the question: For example, which do you prefer… doggies? or cats?

cat_summary("dog_or_cat")
dog_or_cat Count Share
cats! 5 0.08
doggies, please. 11 0.18
NA 44 0.73

7.3 Visualization

7.3.1 Demographics

demog_figures <- function(variable){
   
  variable <- enquo(variable)

  
  df_plot <- df %>%
    select(treatment, !!variable) %>%
    filter(!is.na(treatment) & !is.na(!!variable)) %>%
    group_by(treatment, !!variable) %>%
    count()%>%
    group_by(treatment) %>%
    mutate(percent = paste(round(n/sum(n) * 100, 0), "%", sep = "")) %>%
    ungroup() %>%
    mutate(percent = case_when(!duplicated(!!variable) ~ 
       paste(percent, "\n", str_to_title(!!variable), sep = "")
      ,  
      TRUE ~ percent))
  
 x <- ggplot(df_plot, 
         aes(x = treatment, 
             y = n, 
             fill = !!variable)) +
    geom_bar(position = "fill", stat ="identity", width = .7, color = "white") +
    theme_sarah() +
   scale_x_discrete(labels = label_fun )+
    scale_fill_manual(values = c(cb_colors)) + 
    geom_text(aes( label = percent), 
              color = "white", 
              position = position_fill(vjust = 0.5), 
              size = 12/.pt, 
              fontface = "bold") + 
    labs(x = "", 
         y ="", 
         title = paste("Percentage of Participants by\n Treatment and", str_to_title(str_replace(str_remove(as_label(variable), "demog_"), "_", " ")), sep = " "), 
         caption = paste("Number of Observations:", scales::comma(sum(df_plot$n)))) +
    scale_y_continuous(breaks = c(0, 1), labels = c("0%", "100%"))  + 
    theme(legend.position = "none")
 
  print(x)
}

The next figures show the share of different demographic groups by treatment. Note that the following figures only include observations where the treatment and demographic variables are both non-missing.

for(i in data_dictionary[data_dictionary$group == "demographics" & data_dictionary$type == "category", "cols"]){
    
    cat("####", str_to_title(str_replace(str_remove(i, "demog_"), "_", " ")), "\n\n\n")
  
    demog_figures(!!sym(paste(i, sep = "")))
    
    cat("\n\n\n")
  }

7.3.1.1 Age Group

7.3.1.2 Education

7.3.1.3 Ethnicity

7.3.1.4 Gender

7.3.1.5 Income

7.3.1.6 Political

7.3.1.7 Race

7.3.2 Ads Demographics

Facebook Ads Manager provides cost, impression, and conversation counts at the ad-age-gender level. Below, we show the cost per impression and cost per conversation by age or gender and ad or ad theme. Note that here, we use the Facebook Ads definition of conversations.

# figure is a function of the variable measure (impressions or conversations)
# group (ad theme group or individual ads)
# demographic variable (age or gender)

plot_cost_per <- function(variable, group, demo){
  
  # choose a group to sort bars descending by
  # plot colors
  # filter out unknown gender group since it is a small size
  if(demo == "age"){
    
    label_group <-"65+"
    plot_colors <- cb_colors[3:6]
    
  } else if(demo == "gender"){
    
    label_group <-"female"
    plot_colors <- cb_colors[1:2]
    ads_demo <- ads_demo %>%
      filter(gender != "unknown")
    
  }

  # set plot title and x axis label
  plot_title <- paste("Cost per", str_to_title(str_remove(variable, "s$")), "by", str_to_title(demo), sep =" ")
  x_title <- paste("Cost per", str_to_title(str_remove(variable, "s$")), sep = " ")
  
  group <- enquo(group)
  variable <- sym(variable)
  demo <- sym(demo)

  # calculate cost per measure by group
  plot_data <- ads_demo  %>%
    select(!!group, !!demo, !!variable, cost) %>%
    group_by(!!group, !!demo) %>%
    summarize_all(sum) %>%
    mutate(cost_var = cost/  !!variable, 
          cost_var = ifelse(is.infinite(cost_var), 0, cost_var)
    ) %>%
    group_by(!!demo) %>%
    # order for plotting
    mutate(ordervar = ifelse(!!demo == label_group, rank(cost_var), 10)) %>%
    ungroup() 
  
  # this finds which ad or ad theme has the max cost per measure for female or 65+. 
  # this is where the demographic group labels are plotted
  labels <- plot_data %>%
    filter(!!demo == label_group) %>%
    filter(cost_var ==max(cost_var)) %>%
    pull(!!group)
  
  # create labels for demographic groups
  plot_data <- plot_data %>%
    mutate(demo_label = ifelse(!!group== labels, paste(str_to_title(!!demo), " "), ""))
 
  # plot bars        
  ggplot(plot_data, 
         aes(y = reorder(!!group, ordervar),
             x = cost_var, 
             group = !!demo, 
             fill = !!demo)) + 
    geom_bar(stat = "identity", 
           position = "dodge", 
           color = "white") +
    theme_sarah() +
    theme(legend.position = "none", 
          panel.grid.major.y = element_blank()) +
    labs(y = "", 
     x = x_title, 
     title = plot_title) +
    # set colors
    scale_fill_manual(values = plot_colors) +
    # this plots bar demographic labels
    geom_text(aes(label = demo_label),
              position = position_dodge(0.9),
              color="white",
              vjust = 0.5,
              hjust = 1)
}

# only thing to add after running function is 
# the scale limits and breaks, which is done manually
# since its different for each combination

7.3.2.1 Cost per Impression

7.3.2.1.1 Gender-Ad

Note that Facebook Ads also has a gender category called unknown. Since the size of this category is so small, we do no included these observations below.

plot_cost_per("impressions", ad_name, "gender") +
scale_x_continuous(limits = c(0, 0.065), 
                   expand = c(0,0), 
                   breaks = seq(0, 0.06, 0.01))

7.3.2.1.2 Gender-Ad Theme

Note that Facebook Ads also has a gender category called unknown. Since the size of this category is so small, we do no included these observations below.

plot_cost_per("impressions", theme_group, "gender") +
  scale_x_continuous(limits = c(0, 0.061), 
                     expand = c(0,0), 
                     breaks = seq(0, 0.06, 0.01)) 

7.3.2.1.3 Age-Ad
plot_cost_per("impressions", ad_name, "age") +
  scale_x_continuous(limits = c(0, 0.056), 
                     expand = c(0,0), 
                     breaks = seq(0, 0.055, 0.01)) 

7.3.2.1.4 Age-Ad Theme
plot_cost_per("impressions", theme_group, "age") +
scale_x_continuous(limits = c(0, 0.052), 
                   expand = c(0,0), 
                   breaks = seq(0, 0.05, 0.01)) 

7.3.2.2 Cost per Conversation

7.3.2.2.1 Gender-Ad

Note that Facebook Ads also has a gender category called unknown. Since the size of this category is so small, we do no included these observations below. We calculate cost per conversation = cost / #conversations. Missing bars indicates that we could not calculate cost per conversation because there were zero conversations for that subgroup.

plot_cost_per("conversations", ad_name, "gender") +
  scale_x_continuous(limits = c(0, 65), 
                     expand = c(0,0), 
                     breaks = seq(0, 60, 10)) 

7.3.2.2.2 Gender-Ad Theme

Note that Facebook Ads also has a gender category called unknown. Since the size of this category is so small, we do no included these observations below.

plot_cost_per("conversations", theme_group, "gender") +
  scale_x_continuous(limits = c(0, 25.5),
                     expand = c(0,0),
                     breaks = seq(0, 25, 5)) 

7.3.2.2.3 Age-Ad

We calculate cost per conversation = cost / #conversations. Missing bars indicates that we could not calculate cost per conversation because there were zero conversations for that subgroup.

plot_cost_per("conversations", ad_name, "age") +
  scale_x_continuous(limits = c(0, 65),
                     expand = c(0,0),
                     breaks = seq(0, 65, 10)) 

7.3.2.2.4 Age-Ad Theme

We calculate cost per conversation = cost / #conversations. Missing bars indicates that we could not calculate cost per conversation because there were zero conversations for that subgroup.

plot_cost_per("conversations", theme_group, "age") +
  scale_x_continuous(limits = c(0, 35.5),
                     expand = c(0,0),
                     breaks = seq(0, 35, 5)) 

8 Funnel Statistics

8.1 Table 1: Summary of Results

This table presents an overview of funnel statistics at the ad-, chatbot-, and treatment-level.

8.1.1 Ad-level

All responses, regardless of treatment-level, are included. Note that here, we use the Facebook Ads definition of conversations.

ads %>%
  mutate(cost_per_impression = form_cost(cost/impressions),
         cost_per_consent = form_cost(cost/consents), 
         cost_per_click = form_cost(cost/clicks),
         cost_per_completion = form_cost(cost/completions), 
         impressions = form_num(impressions), 
         clicks = form_num(clicks)) %>%
  select("Ad" = ad_name, 
         "Impressions" = impressions,
         "Clicks" = clicks,  
         "Conversations" = conversations, 
         "Consents" = consents,
         "Completions" = completions, 
         "Cost per Impression" = cost_per_impression, 
         "Cost per Click" = cost_per_click,
         "Cost per Consent" = cost_per_consent, 
         "Cost per Completion" = cost_per_completion) %>%
    arrange(Ad) %>%
    kable(escape = F) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
Ad Impressions Clicks Conversations Consents Completions Cost per Impression Cost per Click Cost per Consent Cost per Completion
Attention 1 1,983 105 5 2 1 $0.04 $0.765 $40.155 $80.31
Attention 2 2,778 160 1 0 0 $0.03 $0.522
Fake 18,192 1,099 65 25 7 $0.042 $0.697 $30.644 $109.443
Fake News 1 1,148 97 1 0 0 $0.025 $0.301
Fake News 2 2,954 254 4 2 1 $0.019 $0.222 $28.17 $56.34
No Ad ID
5 4 2
Outrage 1 4,274 231 9 7 3 $0.037 $0.677 $22.353 $52.157
Outrage 2 12,629 711 25 11 4 $0.043 $0.767 $49.576 $136.335
Real 1 3,733 108 15 6 1 $0.027 $0.939 $16.908 $101.45
Real 2 6,185 305 13 3 2 $0.029 $0.595 $60.443 $90.665

8.1.2 Chatbot-level

All users who started a conversation are included.

Cost per Consent = total experiment costs / number of consents. Cost per Completion = total experiment costs / number of completions. Conversations = Number of chatfuel observations.

Note that here, we use the Chatfuel metric of conversations - the number of chatfuel observations. FB Ads Manager also records a conversations variable, but we assume the Chatfuel metric unless otherwise noted (i.e. in ads analyses).

total_cost <- sum(ads$cost, na.rm  =T)

df %>%
  select(intro_version, consent, full_completed) %>%
  group_by(intro_version) %>%
  # add consents and completions because they are binary
  summarize(consents = sum(consent, na.rm = T),
            completions = sum(full_completed, na.rm = T),
            # number of chatfuel conversations
            conversations = length(intro_version) ) %>%
  mutate(cost_per_consent = form_cost(total_cost/consents), 
         cost_per_completion = form_cost(total_cost/completions), 
         treatment = "all") %>%
  # Rename variables to titles
  select("Chatbot" = intro_version, 
         "Treatment" = treatment,
         "Conversations" = conversations, 
         "Consents" = consents,
         "Completions" = completions, 
         "Cost per Consent" = cost_per_consent, 
         "Cost per Completion" = cost_per_completion
      ) %>%
  kable(escape = F) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
Chatbot Treatment Conversations Consents Completions Cost per Consent Cost per Completion
1a all 35 17 6 $117.647 $333.333
1b all 36 20 8 $100 $250
2a all 35 9 2 $222.222 $1000
2b all 37 14 5 $142.857 $400

8.1.3 Treatment-level

All users who consented are included.

Cost per Consent = total experiment costs / consents. Cost per Completion = total experiment costs / completions.

# treatment level
df %>%
  select(treatment, consent, full_completed) %>%
  filter(treatment != "no treatment") %>%
  group_by(treatment) %>%
  summarize(consents = sum(consent, na.rm  = T),
            completions = sum(full_completed, na.rm  = T)) %>%
  mutate(cost_per_consent = form_cost(total_cost/consents), 
         cost_per_completion = form_cost(total_cost/completions), 
         treatment = label_fun(treatment)) %>%
  # all treatments
  rbind(df %>%
          filter(!is.na(treatment)) %>%
          select( consent, full_completed) %>%
          summarize(consents = sum(consent, na.rm  = T),
                    completions = sum(full_completed, na.rm  = T)) %>%
          mutate(cost_per_consent = form_cost(total_cost/consents), 
                 cost_per_completion = form_cost(total_cost/completions), 
                 treatment = "All")) %>%
  # rename column names to titles
  select("Treatment" = treatment,
         "Consents" = consents,
         "Completions" = completions, 
         "Cost per Consent" = cost_per_consent, 
         "Cost per Completion" = cost_per_completion) %>%
  kable(escape = F) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
Treatment Consents Completions Cost per Consent Cost per Completion
Control 28 7 $71.429 $285.714
SMS 32 14 $62.5 $142.857
All 60 21 $33.333 $95.238

8.2 Table 2: Overall Funnel

This table shows the detailed funnel by all, treatment, and control participants. Here, “All” refers to all participants from the stage of ad impressions, including those who did not start a conversation or did not consent to participate in the study (these users were not assigned to an arm). “SMS participants” refer to consenting users who are assigned to the SMS intervention arm, and “Control participants” refer to consenting users who are assigned to the control arm.

Funnel Parameters

  1. Ad clickthrough (%) = #clicks / #impressions
  2. Conversation started (%) = #conversations / #clicks
  3. Consent obtained (%) = #consents / #conversations
  4. Treatment completed (%) = #forking section completed / # consents
  5. Outcome completed (%) = #outcome completed / #forking section completed
  6. Feedback completed (%) = #feedback section completed / #outcome completed
  7. Demographics Completed (%) = #demographic section completed / #feedback section completed
  8. Full survey completed (%) = #full survey completed / #demographic section completed
get_funnel <- function(group, ad = NULL){
  
  # filter data for ad level analysis
  if(!is.null(ad)){
    df_temp <- df[df$ad_name == ad & !is.na(df$ad_name), ]
    ads_temp <- ads[ads$ad_name == ad & !is.na(ads$ad_name), ]
  } else{
    df_temp <- df
    ads_temp <- ads
  }
  
  # filter data for treatment level analysis
  if(group == "sms"){
    df_temp <- df_temp[df_temp$control == 0 & !is.na(df_temp$control), ]
  } else if (group == "control"){
    df_temp <- df_temp[df_temp$control == 1 & !is.na(df_temp$control), ]
  } else if (group == "all"){
    df_temp <- df_temp
  }
  
  # calculate total costs, impressions, clicks, and conversations
  cost <- sum(ads_temp$cost, na.rm = T)
  impressions <- sum(ads_temp$impressions, na.rm = T)
  clicks <- sum(ads_temp$clicks, na.rm = T)
  conversations <- nrow(df_temp)
  
  consents <- sum(df_temp$consent, na.rm = T)
  treatment_complete <- sum(df_temp$intervention_arms_completed) + sum(df_temp$control_arm_completed)
  outcome_complete <- sum(!is.na(df_temp$sharing_misinfo), na.rm = T)
  feedback_complete <- sum(df_temp$feedback_completed)
  demog_complete <- sum(df_temp$demog_completed)
  full_complete <- sum(df_temp$full_completed)
  
  metric = c(
    "Impressions", 
    "Ad Clickthrough",
    "Conversation Started",
    "Consent Obtained", 
    "Treatment Completed",
    "Outcome Complete",
    "Feedback Complete", 
    "Demographics Complete", 
    "Full Complete")
  
    
  costs <- c(form_cost(cost/impressions), 
              form_cost(cost/clicks), 
             form_cost(cost/conversations), 
             form_cost(cost/consents),
             form_cost(cost/treatment_complete), 
             form_cost(cost/outcome_complete), 
             form_cost(cost/feedback_complete),
             form_cost(cost/demog_complete),
             form_cost(cost/full_complete))
    
  counts = c(scales::comma(impressions), 
             scales::comma(clicks), 
             scales::comma(conversations), 
             scales::comma(consents),
             scales::comma(treatment_complete),
             scales::comma(outcome_complete),
             scales::comma(feedback_complete),
             scales::comma(demog_complete), 
             scales::comma(full_complete))
    
  percent = c(
    # Impressions
    "-", 
    # (1) Ad clickthrough (%) = #clicks / #impressions
    form_percent(clicks / impressions),
    # (2) Conversation started (%) = #conversations / #clicks
    form_percent(conversations / clicks),
    # (3) Consent obtained (%) = #consents / #conversations
    form_percent(consents / conversations),
    # (4) Core treatment completed (%) = #forking section completed / # consents
    form_percent(treatment_complete / consents),
    # (6) Demo questions completed (%) = #demographic section completed / #treatment section completed
    form_percent(outcome_complete / treatment_complete),
    # (7) Full survey completed (%) = #full chat completed / #demographic section completed
    form_percent(feedback_complete / outcome_complete),
    form_percent(demog_complete / feedback_complete),
    form_percent(full_complete / demog_complete)
  )
  
  percent_of_impressions = c(
 # Impressions
    "-", 
    # (1) Ad clickthrough (%) = #clicks / #impressions
    form_percent(clicks / impressions),
    # (2) Conversation started (%) = #conversations / #clicks
    form_percent(conversations / impressions),
    # (3) Consent obtained (%) = #consents / #conversations
    form_percent(consents / impressions),
    # (5) Full treatment completed (%) = #treatment section completed / #forking section completed
    form_percent(treatment_complete / impressions),
    # (6) Demo questions completed (%) = #demographic section completed / #treatment section completed
    form_percent(outcome_complete / impressions),
    # (7) Full survey completed (%) = #full chat completed / #demographic section completed
    form_percent(feedback_complete / impressions),
    form_percent(demog_complete / impressions),
    form_percent(full_complete / impressions)
    
  )
  
  
  
    percent_of_consents = c(

    "-", 
    "-",
    "-",
    "-",
    form_percent(treatment_complete / consents),
    # (6) Demo questions completed (%) = #demographic section completed / #treatment section completed
    form_percent(outcome_complete / consents),
    # (7) Full survey completed (%) = #full chat completed / #demographic section completed
    form_percent(feedback_complete / consents),
    form_percent(demog_complete / consents),
    form_percent(full_complete / consents)
    
  )
    
     if(group == "all" & is.null(ad)){
          dropoff_1 <- data.frame(cbind(metric, counts, percent, percent_of_impressions, costs))
     } else {
       dropoff_1 <- data.frame(cbind(metric, counts, percent, percent_of_consents))
     }
       
  if(!is.null(ad)){
    if(ad == "No Ad ID"){
      dropoff_1[dropoff_1$metric == "Impressions", c("counts", "percent", "percent_of_consents") ] <- "-"
      dropoff_1[dropoff_1$metric == "Ad Clickthrough", c("counts", "percent", "percent_of_consents")] <- "-"
      dropoff_1[dropoff_1$metric == "Conversation Started", "percent"] <- "-"
    }
  }
    
    if(group == "all" & is.null(ad)){

      
      colnames(dropoff_1) <- c("Metric", "Number of Obs.", 
                           "Percent of Previous", "Percent of Impressions", "Cost per Stage")
      
    } else {
      
       dropoff_1[dropoff_1$metric %in% c("Impressions", "Ad Clickthrough", "Conversation Started"), c("counts", "percent")] <- "-"
     dropoff_1[dropoff_1$metric == "Consent Obtained", "percent"] <- "-"
     
     colnames(dropoff_1) <- c("Metric", "Number of Obs.", 
                           "Percent of Previous", "Percent of Consents")
      
      }
    
  
    print(kable(dropoff_1) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")))
    
}

8.2.1 All Participants

get_funnel(group = "all")
Metric Number of Obs. Percent of Previous Percent of Impressions Cost per Stage
Impressions 53,876
$0.037
Ad Clickthrough 3,070 5.7% 5.7% $0.651
Conversation Started 143 4.66% 0.27% $13.986
Consent Obtained 60 41.96% 0.11% $33.333
Treatment Completed 35 58.33% 0.06% $57.143
Outcome Complete 29 82.86% 0.05% $68.966
Feedback Complete 24 82.76% 0.04% $83.333
Demographics Complete 22 91.67% 0.04% $90.909
Full Complete 21 95.45% 0.04% $95.238

8.2.2 SMS Participants

get_funnel("sms")
Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 32
Treatment Completed 18 56.25% 56.25%
Outcome Complete 16 88.89% 50%
Feedback Complete 14 87.5% 43.75%
Demographics Complete 14 100% 43.75%
Full Complete 14 100% 43.75%

8.2.3 Control Participants

get_funnel("control")
Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 28
Treatment Completed 17 60.71% 60.71%
Outcome Complete 13 76.47% 46.43%
Feedback Complete 10 76.92% 35.71%
Demographics Complete 8 80% 28.57%
Full Complete 7 87.5% 25%

8.3 Table 3: Ad Split

This table shows the detailed funnel by ad split for all, treatment, and control participants. Here, “All” refers to all participants from the stage of ad impressions, including those who did not start a conversation or did not consent to participate in the study (these users were not assigned to an arm).

df <- merge(df, ads[, c("ad_id", "ad_name")], by = "ad_id", all.x = T)

for(i in sort(unique(df$ad_name))) {
  cat("###",  "Ad", i, "{.tabset} \n")
  for(j in c("all", "sms", "control")){
    
    cat("####", paste(label_fun(j), "Participants", sep = " "), " \n")
    print(get_funnel(ad = i, group = j))

    cat("  \n")
  }
  cat("  \n")
}

8.3.1 Ad Attention 1

8.3.1.1 All Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 2
Treatment Completed 2 100% 100%
Outcome Complete 2 100% 100%
Feedback Complete 2 100% 100%
Demographics Complete 2 100% 100%
Full Complete 1 50% 50%
NULL

8.3.1.2 SMS Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 1
Treatment Completed 1 100% 100%
Outcome Complete 1 100% 100%
Feedback Complete 1 100% 100%
Demographics Complete 1 100% 100%
Full Complete 1 100% 100%
NULL

8.3.1.3 Control Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 1
Treatment Completed 1 100% 100%
Outcome Complete 1 100% 100%
Feedback Complete 1 100% 100%
Demographics Complete 1 100% 100%
Full Complete 0 0% 0%
NULL

8.3.2 Ad Attention 2

8.3.2.1 All Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 0
Treatment Completed 0
Outcome Complete 0
Feedback Complete 0
Demographics Complete 0
Full Complete 0
NULL

8.3.2.2 SMS Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 0
Treatment Completed 0
Outcome Complete 0
Feedback Complete 0
Demographics Complete 0
Full Complete 0
NULL

8.3.2.3 Control Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 0
Treatment Completed 0
Outcome Complete 0
Feedback Complete 0
Demographics Complete 0
Full Complete 0
NULL

8.3.3 Ad Fake

8.3.3.1 All Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 25
Treatment Completed 11 44% 44%
Outcome Complete 8 72.73% 32%
Feedback Complete 8 100% 32%
Demographics Complete 7 87.5% 28%
Full Complete 7 100% 28%
NULL

8.3.3.2 SMS Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 11
Treatment Completed 5 45.45% 45.45%
Outcome Complete 4 80% 36.36%
Feedback Complete 4 100% 36.36%
Demographics Complete 4 100% 36.36%
Full Complete 4 100% 36.36%
NULL

8.3.3.3 Control Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 14
Treatment Completed 6 42.86% 42.86%
Outcome Complete 4 66.67% 28.57%
Feedback Complete 4 100% 28.57%
Demographics Complete 3 75% 21.43%
Full Complete 3 100% 21.43%
NULL

8.3.4 Ad Fake News 1

8.3.4.1 All Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 0
Treatment Completed 0
Outcome Complete 0
Feedback Complete 0
Demographics Complete 0
Full Complete 0
NULL

8.3.4.2 SMS Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 0
Treatment Completed 0
Outcome Complete 0
Feedback Complete 0
Demographics Complete 0
Full Complete 0
NULL

8.3.4.3 Control Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 0
Treatment Completed 0
Outcome Complete 0
Feedback Complete 0
Demographics Complete 0
Full Complete 0
NULL

8.3.5 Ad Fake News 2

8.3.5.1 All Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 2
Treatment Completed 1 50% 50%
Outcome Complete 1 100% 50%
Feedback Complete 1 100% 50%
Demographics Complete 1 100% 50%
Full Complete 1 100% 50%
NULL

8.3.5.2 SMS Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 2
Treatment Completed 1 50% 50%
Outcome Complete 1 100% 50%
Feedback Complete 1 100% 50%
Demographics Complete 1 100% 50%
Full Complete 1 100% 50%
NULL

8.3.5.3 Control Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 0
Treatment Completed 0
Outcome Complete 0
Feedback Complete 0
Demographics Complete 0
Full Complete 0
NULL

8.3.6 Ad No Ad ID

8.3.6.1 All Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 4
Treatment Completed 2 50% 50%
Outcome Complete 2 100% 50%
Feedback Complete 2 100% 50%
Demographics Complete 2 100% 50%
Full Complete 2 100% 50%
NULL

8.3.6.2 SMS Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 4
Treatment Completed 2 50% 50%
Outcome Complete 2 100% 50%
Feedback Complete 2 100% 50%
Demographics Complete 2 100% 50%
Full Complete 2 100% 50%
NULL

8.3.6.3 Control Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 0
Treatment Completed 0
Outcome Complete 0
Feedback Complete 0
Demographics Complete 0
Full Complete 0
NULL

8.3.7 Ad Outrage 1

8.3.7.1 All Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 7
Treatment Completed 5 71.43% 71.43%
Outcome Complete 5 100% 71.43%
Feedback Complete 3 60% 42.86%
Demographics Complete 3 100% 42.86%
Full Complete 3 100% 42.86%
NULL

8.3.7.2 SMS Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 4
Treatment Completed 3 75% 75%
Outcome Complete 3 100% 75%
Feedback Complete 2 66.67% 50%
Demographics Complete 2 100% 50%
Full Complete 2 100% 50%
NULL

8.3.7.3 Control Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 3
Treatment Completed 2 66.67% 66.67%
Outcome Complete 2 100% 66.67%
Feedback Complete 1 50% 33.33%
Demographics Complete 1 100% 33.33%
Full Complete 1 100% 33.33%
NULL

8.3.8 Ad Outrage 2

8.3.8.1 All Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 11
Treatment Completed 6 54.55% 54.55%
Outcome Complete 5 83.33% 45.45%
Feedback Complete 4 80% 36.36%
Demographics Complete 4 100% 36.36%
Full Complete 4 100% 36.36%
NULL

8.3.8.2 SMS Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 7
Treatment Completed 4 57.14% 57.14%
Outcome Complete 4 100% 57.14%
Feedback Complete 3 75% 42.86%
Demographics Complete 3 100% 42.86%
Full Complete 3 100% 42.86%
NULL

8.3.8.3 Control Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 4
Treatment Completed 2 50% 50%
Outcome Complete 1 50% 25%
Feedback Complete 1 100% 25%
Demographics Complete 1 100% 25%
Full Complete 1 100% 25%
NULL

8.3.9 Ad Real 1

8.3.9.1 All Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 6
Treatment Completed 5 83.33% 83.33%
Outcome Complete 3 60% 50%
Feedback Complete 2 66.67% 33.33%
Demographics Complete 1 50% 16.67%
Full Complete 1 100% 16.67%
NULL

8.3.9.2 SMS Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 2
Treatment Completed 1 50% 50%
Outcome Complete 0 0% 0%
Feedback Complete 0
0%
Demographics Complete 0
0%
Full Complete 0
0%
NULL

8.3.9.3 Control Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 4
Treatment Completed 4 100% 100%
Outcome Complete 3 75% 75%
Feedback Complete 2 66.67% 50%
Demographics Complete 1 50% 25%
Full Complete 1 100% 25%
NULL

8.3.10 Ad Real 2

8.3.10.1 All Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 3
Treatment Completed 3 100% 100%
Outcome Complete 3 100% 100%
Feedback Complete 2 66.67% 66.67%
Demographics Complete 2 100% 66.67%
Full Complete 2 100% 66.67%
NULL

8.3.10.2 SMS Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 1
Treatment Completed 1 100% 100%
Outcome Complete 1 100% 100%
Feedback Complete 1 100% 100%
Demographics Complete 1 100% 100%
Full Complete 1 100% 100%
NULL

8.3.10.3 Control Participants

Metric Number of Obs. Percent of Previous Percent of Consents
Impressions
Ad Clickthrough
Conversation Started
Consent Obtained 2
Treatment Completed 2 100% 100%
Outcome Complete 2 100% 100%
Feedback Complete 1 50% 50%
Demographics Complete 1 100% 50%
Full Complete 1 100% 50%
NULL

8.4 Table 4: Chatbot Split

This table shows the detailed funnel by chatbot split. Note that all users who started a conversation are included.

get_stat_by_chatbot = function(chatbot_num) {

  df_ads  <- df[df$intro_version %in% chatbot_num,]

  conversations <- nrow(df_ads)
  
  consents <- sum(df_ads$consent, na.rm = T)
  treatment_complete <- sum(df_ads$intervention_arms_completed) + sum(df_ads$control_arm_completed)
  outcome_complete <- sum(!is.na(df_ads$sharing_misinfo), na.rm = T)
  feedback_complete <- sum(df_ads$feedback_completed)
  demog_complete <- sum(df_ads$demog_completed)
  full_complete <- sum(df_ads$full_completed)
  
  metric = c(
    "Conversation Started",
    "Consent Obtained", 
    "Treatment Completed",
    "Outcome Complete",
    "Feedback Complete", 
    "Demographics Complete", 
    "Full Complete")
  

  
    
  costs <- c("-", 
            "-", 
             "-", 
             "-",
             "-",
             "-", 
             "-")
    
  counts = c(conversations, 
             consents, 
             treatment_complete, 
             outcome_complete, 
             feedback_complete, 
             demog_complete, 
             full_complete)
    
  prop = c( NA, 
            consents/conversations, 
            treatment_complete/consents,
            outcome_complete/treatment_complete, 
            feedback_complete/outcome_complete, 
            demog_complete/feedback_complete,
            full_complete/demog_complete
      
  )
  
  
  percent_of_consents <- c("-", "-", 
                           
                           form_percent(treatment_complete / consents), 
                           form_percent(outcome_complete / consents), 
                           form_percent(feedback_complete / consents), 
                           form_percent(demog_complete / consents),
                           form_percent(full_complete / consents))
  
  
  percent <- unlist(lapply(prop, form_percent))
  final_counts <- unlist(lapply(counts, scales::comma))
  
  dropoff_1 <- cbind("Metric" = metric, 
                     final_counts, percent, percent_of_consents, costs
                     )
    
  ## prop table
  proportion_table <- as.data.frame(
    cbind("Metric" = c("Consents",
                       "Treatment Complete", 
                       "Full Complete"), 
          "Numerator" = c(consents, treatment_complete, full_complete), 
          "Denominator" = c(conversations, consents, consents)))

  colnames(dropoff_1) <- c(
    "Metric", 
    "Number of Obs.",
    "Percent of Previous",
    "Percent of Consents",
    "Cost")

  
  return(list(dropoff_1, proportion_table))
}

8.4.1 Chatbot 1a

get_stat_by_chatbot("1a")[[1]] %>%
  kable() %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
Metric Number of Obs. Percent of Previous Percent of Consents Cost
Conversation Started 35
Consent Obtained 17 48.57%
Treatment Completed 12 70.59% 70.59%
Outcome Complete 8 66.67% 47.06%
Feedback Complete 8 100% 47.06%
Demographics Complete 6 75% 35.29%
Full Complete 6 100% 35.29%

8.4.2 Chatbot 1b

get_stat_by_chatbot("1b")[[1]] %>%
  kable() %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
Metric Number of Obs. Percent of Previous Percent of Consents Cost
Conversation Started 36
Consent Obtained 20 55.56%
Treatment Completed 11 55% 55%
Outcome Complete 11 100% 55%
Feedback Complete 9 81.82% 45%
Demographics Complete 9 100% 45%
Full Complete 8 88.89% 40%

8.4.3 Chatbot 2a

get_stat_by_chatbot("2a")[[1]] %>%
  kable() %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
Metric Number of Obs. Percent of Previous Percent of Consents Cost
Conversation Started 35
Consent Obtained 9 25.71%
Treatment Completed 4 44.44% 44.44%
Outcome Complete 3 75% 33.33%
Feedback Complete 2 66.67% 22.22%
Demographics Complete 2 100% 22.22%
Full Complete 2 100% 22.22%

8.4.4 Chatbot 2b

get_stat_by_chatbot("2b")[[1]] %>%
  kable() %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
Metric Number of Obs. Percent of Previous Percent of Consents Cost
Conversation Started 37
Consent Obtained 14 37.84%
Treatment Completed 8 57.14% 57.14%
Outcome Complete 7 87.5% 50%
Feedback Complete 5 71.43% 35.71%
Demographics Complete 5 100% 35.71%
Full Complete 5 100% 35.71%

8.5 Proportional Tests for Chatbots

In this table, we use a proportion test to compare the difference in means between chatbots. The chatbots are defined as follows:

1a Consent only no factoid no motivator
1b Motivator no factoid motivator
2a Factoid 1 factoid 1 no motivator
2b Factoid 2 factoid 2 no motivator

Metrics:

Consent Rate = #consents/ #conversations started

Treatment Completion Rate = #treatment section completed / #consents

Full Chatbot Completion Rate = #full chatbot completed / #consents

prop_out <- function(chatbot1, chatbot2){
  
  props1 <- get_stat_by_chatbot(chatbot1)[[2]]
  props2 <- get_stat_by_chatbot(chatbot2)[[2]]
  out <- list()
  for(metric in unique(props1$Metric)){
  
    x1 = as.numeric(props1[props1$Metric == metric, "Numerator"])
    x2 = as.numeric(props2[props2$Metric == metric, "Numerator"])
    n1 = as.numeric(props1[props1$Metric == metric, "Denominator"])
    n2 = as.numeric(props2[props2$Metric == metric, "Denominator"])
    
    prop_test <- stats::prop.test(x =  c(x1, x2), n = c(n1, n2), conf.level = 0.95, correct = F)
    
    mean_difference <-paste(round_4(prop_test$estimate[1] - prop_test$estimate[2]))
    se_difference <- round_4(sqrt((x1/n1)*(1 - (x1/n1))/n1 + (x2/n2)*(1 - (x2/n2))/n2))
    p_val <- round_4(prop_test$p.value)
  
    ci <- paste("(", paste(round_4(prop_test$conf.int), collapse = ", "), ")", sep = "")
    
    if(metric == "Consents"){
      
      metric <- "Consent Rate"
      
    } else if(metric == "Treatment Complete"){
      
      metric <- "Treatment Completion Rate"
      
    } else if(metric == "Full Complete"){
      
      metric <- "Full Chatbot Completion Rate"
        
    }
    
    out[[metric]] <- list(`Metric` = metric, 
           `Mean Difference` = mean_difference, `SE of the Difference` = se_difference, `p-value` = p_val, `95% Confidence Interval` = ci)
  }
 
  final <- as.data.frame(do.call(rbind, out)) 
  return(final)
  
}

8.5.3 Factoid 1 (2a) vs. Factoid 2 (2b)

Note: Mean Difference = 2a - 2b. A positive mean difference indicates that 2a better performance than 2b, while and a negative mean difference indicates that 2b better performance than 2a.

prop_out("2a", "2b") %>%
  kable(row.names = F) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
Metric Mean Difference SE of the Difference p-value 95% Confidence Interval
Consent Rate -0.1212 0.1087 0.2701 (-0.3343, 0.0918)
Treatment Completion Rate -0.1270 0.2120 0.5518 (-0.5424, 0.2885)
Full Chatbot Completion Rate -0.1349 0.1887 0.4925 (-0.5047, 0.2349)

8.6 Chatbot-Treatment Questionwise Funnel

detailed_funnel_fun <- function(x){
  if(any(is.na(x))){
    sum(!is.na(x))
  } else {
    if(is.numeric(x)){
      sum(x)
    } else {
      sum(!is.na(x))
    }
  }
}

for(treat in c("sms", "control")){
   cat("###", paste(label_fun(treat), "{.tabset}", sep = " "), " \n")
  
  for(chatbot in c("1a", "1b", "2a", "2b")){
    
    cat("####", paste("Chatbot", chatbot, sep = " "), " \n")
    
    filter_treat <- ifelse(treat == "control", 1, 0)
    
  plot_df <-  df %>%
    filter(intro_version == chatbot &  control == filter_treat) %>%
    select(detailed_funnel[detailed_funnel$treatment == treat & detailed_funnel$version == chatbot, "variable"] %>%pull()) %>%
    summarize_all(.funs =detailed_funnel_fun) %>%
    pivot_longer(cols = everything(), values_to = "count") %>%
    mutate(perc_of_consents = count/pull(.[.$name == "consent", "count"]))
  
  consents <- plot_df %>% filter(name == "consent") %>% pull(count)
  
  gg <- plot_df %>%
    mutate(name = factor(name, levels = rev(name))) %>%
    ggplot(aes(x = name, y = perc_of_consents, group = 1)) + 
    geom_point(size = 3) +
    geom_line() +
    coord_flip() +
    theme_sarah() +
    labs(x= "", 
         y = "Percent of Conversations", 
         title = paste("Number of Consents:", consents)) +
    scale_y_continuous(limits = c(0, 1), 
                       breaks = seq(0, 1, by = 0.2),
                       labels = scales::percent_format(accuracy = 1)) 
    
    
    
    plot_df %>%
      mutate(perc_of_consents = form_percent(perc_of_consents)) %>%
      kable(col.names = c("Variable", "Number. of Obs", "Percent of Consents")) %>%
      kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
      print()
    
    print(gg)

  
    cat("  \n\n\n")
  }
   cat("  \n\n\n")
}

8.6.1 SMS

8.6.1.1 Chatbot 1a

Variable Number. of Obs Percent of Consents
cont_0 6 66.67%
consent 9 100%
what_bother 9 100%
self_affirmation_continue 8 88.89%
intro_go_on 5 55.56%
sms_quiz 5 55.56%
family_friend_check 4 44.44%
fear_quiz 4 44.44%
anger_quiz 4 44.44%
try_me 4 44.44%
headline1_answer 4 44.44%
headline2_answer 4 44.44%
per_unfollow 4 44.44%
dog_or_cat 4 44.44%
outcome_proxy_quiz1 4 44.44%
outcome_proxy_quiz2 4 44.44%
outcome_proxy_quiz3 4 44.44%
enjoy_most 4 44.44%
enjoy_least 4 44.44%
user_comments 4 44.44%
has_technical_issues 4 44.44%
demog_age 4 44.44%
demog_gender 4 44.44%
demog_race 4 44.44%
demog_ethnicity 4 44.44%
demog_income 3 33.33%
demog_education 4 44.44%
demog_political 4 44.44%
share_chatbot 4 44.44%

8.6.1.2 Chatbot 1b

Variable Number. of Obs Percent of Consents
cont_0 11 78.57%
consent 14 100%
what_bother 14 100%
self_affirmation_continue 13 92.86%
intro_go_on 10 71.43%
sms_quiz 10 71.43%
family_friend_check 9 64.29%
fear_quiz 8 57.14%
anger_quiz 8 57.14%
try_me 7 50%
headline1_answer 7 50%
headline2_answer 7 50%
per_unfollow 7 50%
dog_or_cat 7 50%
outcome_proxy_quiz1 7 50%
outcome_proxy_quiz2 7 50%
outcome_proxy_quiz3 7 50%
enjoy_most 7 50%
enjoy_least 6 42.86%
user_comments 6 42.86%
has_technical_issues 6 42.86%
demog_age 6 42.86%
demog_gender 6 42.86%
demog_race 6 42.86%
demog_ethnicity 6 42.86%
demog_income 6 42.86%
demog_education 6 42.86%
demog_political 6 42.86%
share_chatbot 6 42.86%

8.6.1.3 Chatbot 2a

Variable Number. of Obs Percent of Consents
eiffel_continue 1 100%
cont_1 1 100%
cont_2 1 100%
cont_3 1 100%
cont_4 1 100%
cont_5 1 100%
cont_6 1 100%
consent 1 100%
what_bother 1 100%
self_affirmation_continue 1 100%
intro_go_on 1 100%
sms_quiz 1 100%
family_friend_check 1 100%
fear_quiz 1 100%
anger_quiz 1 100%
try_me 0 0%
headline1_answer 0 0%
headline2_answer 0 0%
per_unfollow 0 0%
dog_or_cat 0 0%
outcome_proxy_quiz1 0 0%
outcome_proxy_quiz2 0 0%
outcome_proxy_quiz3 0 0%
enjoy_most 0 0%
enjoy_least 0 0%
user_comments 0 0%
has_technical_issues 0 0%
demog_age 0 0%
demog_gender 0 0%
demog_race 0 0%
demog_ethnicity 0 0%
demog_income 0 0%
demog_education 0 0%
demog_political 0 0%
share_chatbot 0 0%

8.6.1.4 Chatbot 2b

Variable Number. of Obs Percent of Consents
flight_continue 8 100%
cont_1 8 100%
cont_2 8 100%
cont_3 8 100%
cont_4 8 100%
cont_5 8 100%
cont_6 8 100%
consent 8 100%
what_bother 8 100%
self_affirmation_continue 7 87.5%
intro_go_on 6 75%
sms_quiz 6 75%
family_friend_check 6 75%
fear_quiz 6 75%
anger_quiz 6 75%
try_me 6 75%
headline1_answer 6 75%
headline2_answer 6 75%
per_unfollow 6 75%
dog_or_cat 5 62.5%
outcome_proxy_quiz1 5 62.5%
outcome_proxy_quiz2 5 62.5%
outcome_proxy_quiz3 5 62.5%
enjoy_most 5 62.5%
enjoy_least 4 50%
user_comments 4 50%
has_technical_issues 4 50%
demog_age 4 50%
demog_gender 4 50%
demog_race 4 50%
demog_ethnicity 4 50%
demog_income 4 50%
demog_education 4 50%
demog_political 4 50%
share_chatbot 4 50%

8.6.2 Control

8.6.2.1 Chatbot 1a

Variable Number. of Obs Percent of Consents
cont_0 8 100%
consent 8 100%
control_quiz_1 6 75%
control_quiz_2 6 75%
control_quiz_3 7 87.5%
control_quiz_4 6 75%
outcome_proxy_quiz1 4 50%
outcome_proxy_quiz2 4 50%
outcome_proxy_quiz3 4 50%
enjoy_most 4 50%
enjoy_least 4 50%
user_comments 4 50%
has_technical_issues 4 50%
demog_age 3 37.5%
demog_gender 2 25%
demog_race 2 25%
demog_ethnicity 2 25%
demog_income 2 25%
demog_education 2 25%
demog_political 2 25%
share_chatbot 2 25%

8.6.2.2 Chatbot 1b

Variable Number. of Obs Percent of Consents
cont_0 5 83.33%
consent 6 100%
control_quiz_1 5 83.33%
control_quiz_2 5 83.33%
control_quiz_3 4 66.67%
control_quiz_4 3 50%
outcome_proxy_quiz1 4 66.67%
outcome_proxy_quiz2 4 66.67%
outcome_proxy_quiz3 4 66.67%
enjoy_most 3 50%
enjoy_least 3 50%
user_comments 3 50%
has_technical_issues 3 50%
demog_age 3 50%
demog_gender 3 50%
demog_race 3 50%
demog_ethnicity 3 50%
demog_income 3 50%
demog_education 3 50%
demog_political 3 50%
share_chatbot 2 33.33%

8.6.2.3 Chatbot 2a

Variable Number. of Obs Percent of Consents
eiffel_continue 8 100%
cont_1 8 100%
cont_2 8 100%
cont_3 8 100%
cont_4 8 100%
cont_5 8 100%
cont_6 8 100%
consent 8 100%
control_quiz_1 7 87.5%
control_quiz_2 7 87.5%
control_quiz_3 6 75%
control_quiz_4 3 37.5%
outcome_proxy_quiz1 3 37.5%
outcome_proxy_quiz2 3 37.5%
outcome_proxy_quiz3 3 37.5%
enjoy_most 3 37.5%
enjoy_least 2 25%
user_comments 2 25%
has_technical_issues 2 25%
demog_age 2 25%
demog_gender 2 25%
demog_race 2 25%
demog_ethnicity 2 25%
demog_income 2 25%
demog_education 2 25%
demog_political 2 25%
share_chatbot 2 25%

8.6.2.4 Chatbot 2b

Variable Number. of Obs Percent of Consents
flight_continue 6 100%
cont_1 6 100%
cont_2 6 100%
cont_3 6 100%
cont_4 6 100%
cont_5 6 100%
cont_6 6 100%
consent 6 100%
control_quiz_1 6 100%
control_quiz_2 4 66.67%
control_quiz_3 4 66.67%
control_quiz_4 2 33.33%
outcome_proxy_quiz1 2 33.33%
outcome_proxy_quiz2 2 33.33%
outcome_proxy_quiz3 2 33.33%
enjoy_most 1 16.67%
enjoy_least 1 16.67%
user_comments 1 16.67%
has_technical_issues 1 16.67%
demog_age 1 16.67%
demog_gender 1 16.67%
demog_race 1 16.67%
demog_ethnicity 1 16.67%
demog_income 1 16.67%
demog_education 1 16.67%
demog_political 1 16.67%
share_chatbot 1 16.67%

9 Treatment Analysis

The following figures and tables compare the treatment effects between the SMS and control arms for three outcomes: sharing misinformation, sharing baseline, and sharing discernment. Participants are quizzed on three posts in some combination of misinformation and baseline posts. They are asked to rate if they would share the post on a scale of 1 to 5: 1: Definitely not 2: Probably not 3: Not sure 4: Probably yes 5: Definitely yes. To calculate sharing misinformation for each participant, we average the rating for misinformation post for each participants. To calculate sharing baseline for each participant, we average the rating for baseline post for each participants. To calculate sharing discernment, we subtract the average sharing ratings: sharing misinformation - sharing baseline.

A rating of greater than 3 for sharing misinformation and sharing baseline indicate the user would be amenable to sharing the post. Sharing discernment scores can range from -4 to 4. The closer the score is to 0, the less discernment an individual has between misinformation and baseline posts. Positive scores indicate the participants is more amenable to sharing misinformation posts. Negative scores incidate the participant is ore amenable to sharing the baseline post.

estimate_effect <- function(df, outcome, outcome_name){
  
  
  # get mean and std. errors by group for plotting
  eq <- as.formula(paste(outcome, " ~ 0+ treatment"))
  lm <- lm_robust(eq, data = df[df$treatment != "no treatment", ])

  plot <- tidy(lm) 
  gg <- ggplot(plot, aes(x = term, y = estimate))  + 
    geom_point(position = position_dodge(width = 0.5), size = 3) +
    geom_pointrange(
      aes(x = term,ymin = estimate - 1.96 * std.error,ymax = estimate + 1.96 * std.error),width = .1,position = position_dodge(width = 0.5)) + 
    theme(plot.title = element_text(hjust = 0.5),
          axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1) 
          ) +
    scale_x_discrete(labels = function(x){
      label <- str_remove(x, "treatment")
      label <- ifelse(nchar(label) >3, str_to_title(label), toupper(label))
      return(label)
      }) + 
    xlab('Treatment') +
    ylab('Estimate') +
    ggtitle(paste0('Treatment Effect Estimate on ', outcome_name)) +
    theme_minimal() +
    theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 16), axis.text= element_text(size = 12), 
          axis.title = element_text(size = 16))

  if(outcome != "sharing_disc"){
    gg <- gg + scale_y_continuous(limits = c(0.5,5.5), 
                       breaks = seq(1, 5, 1))
  } else {
    gg <- gg + scale_y_continuous(limits = c(-4, 4), 
                       breaks = seq(-4, 4, 1))
  }
  
  print(gg)
    
  # get difference between groups
  eq <- as.formula(paste(outcome, " ~  treatment"))
  lm2 <- lm_robust(eq, data = df)
  
  out <- tidy(lm2)

  sms_n = sum(df$treatment == "sms")
  control_n = sum(df$treatment == "control")

  out[2, ] %>% 
    cbind( plot %>%
    select(term, estimate, std.error) %>%
    pivot_wider(names_from = "term", values_from = c("estimate", "std.error"))
    ) %>%
    mutate(estimate_treatmentsms = paste(round_4(estimate_treatmentsms), "\n(", round_4(std.error_treatmentsms), ")", sep = ""),
           estimate_treatmentcontrol = paste(round_4(estimate_treatmentcontrol), "\n(", round_4(std.error_treatmentcontrol), ")", sep = ""), 
           estimate_difference = paste(round_4(estimate), "\n(", round_4(std.error), ")", sep = ""), 
           conf = paste("(", round_4(conf.low), ", ", round_4(conf.high), ")", sep = ""),
           sms_n, control_n) %>%
    select("SMS Mean" = estimate_treatmentsms, 
           "SMS N" = sms_n, 
           "Control Mean" = estimate_treatmentcontrol, 
           "Control N" = control_n, 
           "Difference" = estimate_difference, 
           "95% Conf. Int." = conf, 
           "p-value" = p.value) %>%
  kable(row.names = F, escape = T) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
  print()
}  

9.1 Outcome 1: Misinformation Sharing

estimate_effect(df, "sharing_misinfo", "Sharing Misinformation")  
SMS Mean SMS N Control Mean Control N Difference 95% Conf. Int. p-value
1.5625 (0.1819) 32 2.0385 (0.3556) 28 -0.4760 (0.3994) (-1.2955, 0.3436) 0.2437981

9.2 Outcome 2: Misinformation Baseline

estimate_effect(df, "sharing_baseline", "Sharing Baseline")  
SMS Mean SMS N Control Mean Control N Difference 95% Conf. Int. p-value
2.8125 (0.3023) 32 2.2692 (0.3028) 28 0.5433 (0.4279) (-0.3348, 1.4213) 0.2150843

9.3 Outcome 3: Sharing Discernment

estimate_effect(df, "sharing_disc", "Sharing Discernment")  
SMS Mean SMS N Control Mean Control N Difference 95% Conf. Int. p-value
-1.2500 (0.2776) 32 -0.2308 (0.2571) 28 -1.0192 (0.3784) (-1.7956, -0.2429) 0.0119954