Load Packages

library(tidyverse)
library(janitor)
library(knitr)
library(psych)

Import Data

data <- read.csv("cleaned_survey_data.csv", stringsAsFactors = FALSE)
glimpse(data)
## Rows: 34
## Columns: 20
## $ StartDate             <chr> "2026-03-11 18:04:45", "2026-03-16 17:09:27", "2…
## $ EndDate               <chr> "2026-03-11 18:05:08", "2026-03-16 17:10:12", "2…
## $ Status                <chr> "Survey Preview", "IP Address", "IP Address", "I…
## $ IPAddress             <chr> "", "136.168.93.87", "136.168.93.87", "136.168.9…
## $ Progress              <int> 100, 100, 100, 100, 100, 100, 100, 100, 14, 14, …
## $ Duration..in.seconds. <int> 22, 44, 100, 165, 280, 11, 47, 216, 8, 9, 93, 69…
## $ Finished              <chr> "True", "True", "True", "True", "True", "True", …
## $ RecordedDate          <chr> "2026-03-11 18:05:08", "2026-03-16 17:10:12", "2…
## $ ResponseId            <chr> "R_6tMzrMDtJIunazx", "R_6FT9SwEumVWfibn", "R_7r0…
## $ LocationLatitude      <dbl> 35.3288, 35.3407, 35.3407, 35.3407, 35.3407, 35.…
## $ LocationLongitude     <dbl> -118.9748, -119.0596, -119.0596, -119.0596, -119…
## $ DistributionChannel   <chr> "preview", "anonymous", "anonymous", "anonymous"…
## $ UserLanguage          <chr> "EN", "EN", "EN", "EN", "EN", "EN", "EN", "EN", …
## $ Q1_1                  <chr> "Yes", "", "", "", "", "", "Yes", "Yes", "Yes", …
## $ Q2_1                  <chr> "", "", "", "", "", "", "Multiple Times Per Day"…
## $ Q19_1                 <chr> "", "", "", "", "", "", "Very Often", "Rarely", …
## $ Q20_1                 <chr> "", "", "", "", "", "", "Neutral", "Somewhat Unt…
## $ Q21                   <chr> "", "", "", "", "", "", "No", "Yes", "", "", "No…
## $ Q4                    <chr> "", "", "", "", "", "", "None influence me", "Re…
## $ random                <dbl> 2, 1, 1, 2, 2, 2, 1, 2, NA, NA, 2, 2, NA, NA, 2,…

Clean and Prepare Data

data <- data %>%
  clean_names() %>%
  mutate(
    finished = case_when(
      finished %in% c(TRUE, "TRUE", "True", "true", 1, "1") ~ TRUE,
      finished %in% c(FALSE, "FALSE", "False", "false", 0, "0") ~ FALSE,
      TRUE ~ as.logical(finished)
    ),
    progress = suppressWarnings(as.numeric(progress))
  )

survey <- data %>%
  filter(finished == TRUE,
         progress == 100,
         status != "Survey Preview")

survey <- survey %>%
  mutate(
    random = factor(random,
                    levels = c(1, 2, "1", "2"),
                    labels = c("Control", "Treatment", "Control", "Treatment")),
    q19_1 = factor(q19_1,
                   levels = c("Never", "Rarely", "Sometimes", "Very Often"),
                   ordered = TRUE),
    q20_1 = factor(q20_1,
                   levels = c("Very Untrustworthy",
                              "Somewhat Untrustworthy",
                              "Neutral",
                              "Somewhat Trustworthy",
                              "Very Trustworthy"),
                   ordered = TRUE),
    q21 = factor(q21),
    q2_1 = factor(q2_1),
    q4 = factor(q4)
  )

str(survey)
## 'data.frame':    29 obs. of  20 variables:
##  $ start_date          : chr  "2026-03-16 17:09:27" "2026-03-16 17:16:06" "2026-03-16 17:28:23" "2026-03-16 17:31:19" ...
##  $ end_date            : chr  "2026-03-16 17:10:12" "2026-03-16 17:17:46" "2026-03-16 17:31:09" "2026-03-16 17:36:00" ...
##  $ status              : chr  "IP Address" "IP Address" "IP Address" "IP Address" ...
##  $ ip_address          : chr  "136.168.93.87" "136.168.93.87" "136.168.93.87" "136.168.93.87" ...
##  $ progress            : num  100 100 100 100 100 100 100 100 100 100 ...
##  $ duration_in_seconds : int  44 100 165 280 11 47 216 93 69 109 ...
##  $ finished            : logi  TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ recorded_date       : chr  "2026-03-16 17:10:12" "2026-03-16 17:17:47" "2026-03-16 17:31:09" "2026-03-16 17:36:00" ...
##  $ response_id         : chr  "R_6FT9SwEumVWfibn" "R_7r0OZWUHhBCm2Mm" "R_66K7tmLVI2vAjlv" "R_5MDmkmjK7azmwwk" ...
##  $ location_latitude   : num  35.3 35.3 35.3 35.3 35.3 ...
##  $ location_longitude  : num  -119 -119 -119 -119 -119 ...
##  $ distribution_channel: chr  "anonymous" "anonymous" "anonymous" "anonymous" ...
##  $ user_language       : chr  "EN" "EN" "EN" "EN" ...
##  $ q1_1                : chr  "" "" "" "" ...
##  $ q2_1                : Factor w/ 4 levels "","Few Times Per Week",..: 1 1 1 1 1 3 4 3 4 3 ...
##  $ q19_1               : Ord.factor w/ 4 levels "Never"<"Rarely"<..: NA NA NA NA NA 4 2 3 2 3 ...
##  $ q20_1               : Ord.factor w/ 5 levels "Very Untrustworthy"<..: NA NA NA NA NA 3 2 1 2 3 ...
##  $ q21                 : Factor w/ 3 levels "","No","Yes": 1 1 1 1 1 2 3 2 3 3 ...
##  $ q4                  : Factor w/ 7 levels "","Influencer promotions",..: 1 1 1 1 1 3 4 4 7 7 ...
##  $ random              : Factor w/ 2 levels "Control","Treatment": 1 1 2 2 2 1 2 2 2 2 ...

Descriptive Statistics

table(survey$random)
## 
##   Control Treatment 
##        17        12
prop.table(table(survey$random))
## 
##   Control Treatment 
## 0.5862069 0.4137931
table(survey$q2_1)
## 
##                            Few Times Per Week Multiple Times Per Day 
##                      5                      1                     21 
##                 Rarely 
##                      2
table(survey$q19_1)
## 
##      Never     Rarely  Sometimes Very Often 
##          4          5         10          5
table(survey$q20_1)
## 
##     Very Untrustworthy Somewhat Untrustworthy                Neutral 
##                      4                      6                      8 
##   Somewhat Trustworthy       Very Trustworthy 
##                      4                      2
table(survey$q21)
## 
##      No Yes 
##   5  10  14
table(survey$q4)
## 
##                                                 
##                                               5 
##                           Influencer promotions 
##                                               9 
##                               None influence me 
##                                               3 
## Retargeted ads (ads based on previous searches) 
##                                               4 
##                     Sponsored posts from brands 
##                                               1 
##                                       Story ads 
##                                               1 
##                                       Video ads 
##                                               6
prop.table(table(survey$q19_1)) * 100
## 
##      Never     Rarely  Sometimes Very Often 
##   16.66667   20.83333   41.66667   20.83333
prop.table(table(survey$q20_1)) * 100
## 
##     Very Untrustworthy Somewhat Untrustworthy                Neutral 
##              16.666667              25.000000              33.333333 
##   Somewhat Trustworthy       Very Trustworthy 
##              16.666667               8.333333
prop.table(table(survey$q21)) * 100
## 
##                No      Yes 
## 17.24138 34.48276 48.27586

Visualizations

ggplot(survey, aes(x = random)) +
  geom_bar() +
  labs(title = "Count by Experimental Condition")

ggplot(survey, aes(x = q19_1, fill = random)) +
  geom_bar(position = "dodge") +
  labs(title = "Q19_1 by Group")

ggplot(survey, aes(x = q20_1, fill = random)) +
  geom_bar(position = "dodge") +
  labs(title = "Q20_1 by Group") +
  theme(axis.text.x = element_text(angle = 30, hjust = 1))

Cross Tabs

tab_q19 <- table(survey$random, survey$q19_1)
tab_q20 <- table(survey$random, survey$q20_1)
tab_q21 <- table(survey$random, survey$q21)

tab_q19
##            
##             Never Rarely Sometimes Very Often
##   Control       3      1         7          4
##   Treatment     1      4         3          1
prop.table(tab_q19, 1)
##            
##                  Never     Rarely  Sometimes Very Often
##   Control   0.20000000 0.06666667 0.46666667 0.26666667
##   Treatment 0.11111111 0.44444444 0.33333333 0.11111111
tab_q20
##            
##             Very Untrustworthy Somewhat Untrustworthy Neutral
##   Control                    3                      3       5
##   Treatment                  1                      3       3
##            
##             Somewhat Trustworthy Very Trustworthy
##   Control                      2                2
##   Treatment                    2                0
prop.table(tab_q20, 1)
##            
##             Very Untrustworthy Somewhat Untrustworthy   Neutral
##   Control            0.2000000              0.2000000 0.3333333
##   Treatment          0.1111111              0.3333333 0.3333333
##            
##             Somewhat Trustworthy Very Trustworthy
##   Control              0.1333333        0.1333333
##   Treatment            0.2222222        0.0000000
tab_q21
##            
##               No Yes
##   Control   2  7   8
##   Treatment 3  3   6
prop.table(tab_q21, 1)
##            
##                              No       Yes
##   Control   0.1176471 0.4117647 0.4705882
##   Treatment 0.2500000 0.2500000 0.5000000

Hypothesis Testing

chisq.test(tab_q19)
## 
##  Pearson's Chi-squared test
## 
## data:  tab_q19
## X-squared = 5.0133, df = 3, p-value = 0.1708
chisq.test(tab_q20)
## 
##  Pearson's Chi-squared test
## 
## data:  tab_q20
## X-squared = 2.1333, df = 4, p-value = 0.7113
chisq.test(tab_q21)
## 
##  Pearson's Chi-squared test
## 
## data:  tab_q21
## X-squared = 1.2611, df = 2, p-value = 0.5323

Numeric Conversion + Means

survey_num <- survey %>%
  mutate(
    q19_num = as.numeric(q19_1),
    q20_num = as.numeric(q20_1)
  )

describe(survey_num %>% select(q19_num, q20_num))
##         vars  n mean   sd median trimmed  mad min max range  skew kurtosis   se
## q19_num    1 24 2.67 1.01      3     2.7 1.48   1   4     3 -0.32    -1.07 0.21
## q20_num    2 24 2.75 1.19      3     2.7 1.48   1   5     4  0.17    -0.90 0.24
survey_num %>%
  group_by(random) %>%
  summarise(
    mean_q19 = mean(q19_num, na.rm = TRUE),
    mean_q20 = mean(q20_num, na.rm = TRUE)
  )
## # A tibble: 2 × 3
##   random    mean_q19 mean_q20
##   <fct>        <dbl>    <dbl>
## 1 Control       2.8      2.8 
## 2 Treatment     2.44     2.67

T-Tests

t.test(q19_num ~ random, data = survey_num)
## 
##  Welch Two Sample t-test
## 
## data:  q19_num by random
## t = 0.87661, df = 19.767, p-value = 0.3912
## alternative hypothesis: true difference in means between group Control and group Treatment is not equal to 0
## 95 percent confidence interval:
##  -0.4911591  1.2022702
## sample estimates:
##   mean in group Control mean in group Treatment 
##                2.800000                2.444444
t.test(q20_num ~ random, data = survey_num)
## 
##  Welch Two Sample t-test
## 
## data:  q20_num by random
## t = 0.27966, df = 20.604, p-value = 0.7825
## alternative hypothesis: true difference in means between group Control and group Treatment is not equal to 0
## 95 percent confidence interval:
##  -0.8593067  1.1259734
## sample estimates:
##   mean in group Control mean in group Treatment 
##                2.800000                2.666667

Interpretation

cat("
Write your interpretation here:

- Compare Control vs Treatment
- Mention if results are statistically significant (p < 0.05)
- Explain if hypothesis is supported or not
")

Write your interpretation here: