library(tidyverse)
library(janitor)
library(knitr)
library(psych)
data <- read.csv("cleaned_survey_data.csv", stringsAsFactors = FALSE)
glimpse(data)
## Rows: 34
## Columns: 20
## $ StartDate <chr> "2026-03-11 18:04:45", "2026-03-16 17:09:27", "2…
## $ EndDate <chr> "2026-03-11 18:05:08", "2026-03-16 17:10:12", "2…
## $ Status <chr> "Survey Preview", "IP Address", "IP Address", "I…
## $ IPAddress <chr> "", "136.168.93.87", "136.168.93.87", "136.168.9…
## $ Progress <int> 100, 100, 100, 100, 100, 100, 100, 100, 14, 14, …
## $ Duration..in.seconds. <int> 22, 44, 100, 165, 280, 11, 47, 216, 8, 9, 93, 69…
## $ Finished <chr> "True", "True", "True", "True", "True", "True", …
## $ RecordedDate <chr> "2026-03-11 18:05:08", "2026-03-16 17:10:12", "2…
## $ ResponseId <chr> "R_6tMzrMDtJIunazx", "R_6FT9SwEumVWfibn", "R_7r0…
## $ LocationLatitude <dbl> 35.3288, 35.3407, 35.3407, 35.3407, 35.3407, 35.…
## $ LocationLongitude <dbl> -118.9748, -119.0596, -119.0596, -119.0596, -119…
## $ DistributionChannel <chr> "preview", "anonymous", "anonymous", "anonymous"…
## $ UserLanguage <chr> "EN", "EN", "EN", "EN", "EN", "EN", "EN", "EN", …
## $ Q1_1 <chr> "Yes", "", "", "", "", "", "Yes", "Yes", "Yes", …
## $ Q2_1 <chr> "", "", "", "", "", "", "Multiple Times Per Day"…
## $ Q19_1 <chr> "", "", "", "", "", "", "Very Often", "Rarely", …
## $ Q20_1 <chr> "", "", "", "", "", "", "Neutral", "Somewhat Unt…
## $ Q21 <chr> "", "", "", "", "", "", "No", "Yes", "", "", "No…
## $ Q4 <chr> "", "", "", "", "", "", "None influence me", "Re…
## $ random <dbl> 2, 1, 1, 2, 2, 2, 1, 2, NA, NA, 2, 2, NA, NA, 2,…
data <- data %>%
clean_names() %>%
mutate(
finished = case_when(
finished %in% c(TRUE, "TRUE", "True", "true", 1, "1") ~ TRUE,
finished %in% c(FALSE, "FALSE", "False", "false", 0, "0") ~ FALSE,
TRUE ~ as.logical(finished)
),
progress = suppressWarnings(as.numeric(progress))
)
survey <- data %>%
filter(finished == TRUE,
progress == 100,
status != "Survey Preview")
survey <- survey %>%
mutate(
random = factor(random,
levels = c(1, 2, "1", "2"),
labels = c("Control", "Treatment", "Control", "Treatment")),
q19_1 = factor(q19_1,
levels = c("Never", "Rarely", "Sometimes", "Very Often"),
ordered = TRUE),
q20_1 = factor(q20_1,
levels = c("Very Untrustworthy",
"Somewhat Untrustworthy",
"Neutral",
"Somewhat Trustworthy",
"Very Trustworthy"),
ordered = TRUE),
q21 = factor(q21),
q2_1 = factor(q2_1),
q4 = factor(q4)
)
str(survey)
## 'data.frame': 29 obs. of 20 variables:
## $ start_date : chr "2026-03-16 17:09:27" "2026-03-16 17:16:06" "2026-03-16 17:28:23" "2026-03-16 17:31:19" ...
## $ end_date : chr "2026-03-16 17:10:12" "2026-03-16 17:17:46" "2026-03-16 17:31:09" "2026-03-16 17:36:00" ...
## $ status : chr "IP Address" "IP Address" "IP Address" "IP Address" ...
## $ ip_address : chr "136.168.93.87" "136.168.93.87" "136.168.93.87" "136.168.93.87" ...
## $ progress : num 100 100 100 100 100 100 100 100 100 100 ...
## $ duration_in_seconds : int 44 100 165 280 11 47 216 93 69 109 ...
## $ finished : logi TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ recorded_date : chr "2026-03-16 17:10:12" "2026-03-16 17:17:47" "2026-03-16 17:31:09" "2026-03-16 17:36:00" ...
## $ response_id : chr "R_6FT9SwEumVWfibn" "R_7r0OZWUHhBCm2Mm" "R_66K7tmLVI2vAjlv" "R_5MDmkmjK7azmwwk" ...
## $ location_latitude : num 35.3 35.3 35.3 35.3 35.3 ...
## $ location_longitude : num -119 -119 -119 -119 -119 ...
## $ distribution_channel: chr "anonymous" "anonymous" "anonymous" "anonymous" ...
## $ user_language : chr "EN" "EN" "EN" "EN" ...
## $ q1_1 : chr "" "" "" "" ...
## $ q2_1 : Factor w/ 4 levels "","Few Times Per Week",..: 1 1 1 1 1 3 4 3 4 3 ...
## $ q19_1 : Ord.factor w/ 4 levels "Never"<"Rarely"<..: NA NA NA NA NA 4 2 3 2 3 ...
## $ q20_1 : Ord.factor w/ 5 levels "Very Untrustworthy"<..: NA NA NA NA NA 3 2 1 2 3 ...
## $ q21 : Factor w/ 3 levels "","No","Yes": 1 1 1 1 1 2 3 2 3 3 ...
## $ q4 : Factor w/ 7 levels "","Influencer promotions",..: 1 1 1 1 1 3 4 4 7 7 ...
## $ random : Factor w/ 2 levels "Control","Treatment": 1 1 2 2 2 1 2 2 2 2 ...
table(survey$random)
##
## Control Treatment
## 17 12
prop.table(table(survey$random))
##
## Control Treatment
## 0.5862069 0.4137931
table(survey$q2_1)
##
## Few Times Per Week Multiple Times Per Day
## 5 1 21
## Rarely
## 2
table(survey$q19_1)
##
## Never Rarely Sometimes Very Often
## 4 5 10 5
table(survey$q20_1)
##
## Very Untrustworthy Somewhat Untrustworthy Neutral
## 4 6 8
## Somewhat Trustworthy Very Trustworthy
## 4 2
table(survey$q21)
##
## No Yes
## 5 10 14
table(survey$q4)
##
##
## 5
## Influencer promotions
## 9
## None influence me
## 3
## Retargeted ads (ads based on previous searches)
## 4
## Sponsored posts from brands
## 1
## Story ads
## 1
## Video ads
## 6
prop.table(table(survey$q19_1)) * 100
##
## Never Rarely Sometimes Very Often
## 16.66667 20.83333 41.66667 20.83333
prop.table(table(survey$q20_1)) * 100
##
## Very Untrustworthy Somewhat Untrustworthy Neutral
## 16.666667 25.000000 33.333333
## Somewhat Trustworthy Very Trustworthy
## 16.666667 8.333333
prop.table(table(survey$q21)) * 100
##
## No Yes
## 17.24138 34.48276 48.27586
ggplot(survey, aes(x = random)) +
geom_bar() +
labs(title = "Count by Experimental Condition")
ggplot(survey, aes(x = q19_1, fill = random)) +
geom_bar(position = "dodge") +
labs(title = "Q19_1 by Group")
ggplot(survey, aes(x = q20_1, fill = random)) +
geom_bar(position = "dodge") +
labs(title = "Q20_1 by Group") +
theme(axis.text.x = element_text(angle = 30, hjust = 1))
tab_q19 <- table(survey$random, survey$q19_1)
tab_q20 <- table(survey$random, survey$q20_1)
tab_q21 <- table(survey$random, survey$q21)
tab_q19
##
## Never Rarely Sometimes Very Often
## Control 3 1 7 4
## Treatment 1 4 3 1
prop.table(tab_q19, 1)
##
## Never Rarely Sometimes Very Often
## Control 0.20000000 0.06666667 0.46666667 0.26666667
## Treatment 0.11111111 0.44444444 0.33333333 0.11111111
tab_q20
##
## Very Untrustworthy Somewhat Untrustworthy Neutral
## Control 3 3 5
## Treatment 1 3 3
##
## Somewhat Trustworthy Very Trustworthy
## Control 2 2
## Treatment 2 0
prop.table(tab_q20, 1)
##
## Very Untrustworthy Somewhat Untrustworthy Neutral
## Control 0.2000000 0.2000000 0.3333333
## Treatment 0.1111111 0.3333333 0.3333333
##
## Somewhat Trustworthy Very Trustworthy
## Control 0.1333333 0.1333333
## Treatment 0.2222222 0.0000000
tab_q21
##
## No Yes
## Control 2 7 8
## Treatment 3 3 6
prop.table(tab_q21, 1)
##
## No Yes
## Control 0.1176471 0.4117647 0.4705882
## Treatment 0.2500000 0.2500000 0.5000000
chisq.test(tab_q19)
##
## Pearson's Chi-squared test
##
## data: tab_q19
## X-squared = 5.0133, df = 3, p-value = 0.1708
chisq.test(tab_q20)
##
## Pearson's Chi-squared test
##
## data: tab_q20
## X-squared = 2.1333, df = 4, p-value = 0.7113
chisq.test(tab_q21)
##
## Pearson's Chi-squared test
##
## data: tab_q21
## X-squared = 1.2611, df = 2, p-value = 0.5323
survey_num <- survey %>%
mutate(
q19_num = as.numeric(q19_1),
q20_num = as.numeric(q20_1)
)
describe(survey_num %>% select(q19_num, q20_num))
## vars n mean sd median trimmed mad min max range skew kurtosis se
## q19_num 1 24 2.67 1.01 3 2.7 1.48 1 4 3 -0.32 -1.07 0.21
## q20_num 2 24 2.75 1.19 3 2.7 1.48 1 5 4 0.17 -0.90 0.24
survey_num %>%
group_by(random) %>%
summarise(
mean_q19 = mean(q19_num, na.rm = TRUE),
mean_q20 = mean(q20_num, na.rm = TRUE)
)
## # A tibble: 2 × 3
## random mean_q19 mean_q20
## <fct> <dbl> <dbl>
## 1 Control 2.8 2.8
## 2 Treatment 2.44 2.67
t.test(q19_num ~ random, data = survey_num)
##
## Welch Two Sample t-test
##
## data: q19_num by random
## t = 0.87661, df = 19.767, p-value = 0.3912
## alternative hypothesis: true difference in means between group Control and group Treatment is not equal to 0
## 95 percent confidence interval:
## -0.4911591 1.2022702
## sample estimates:
## mean in group Control mean in group Treatment
## 2.800000 2.444444
t.test(q20_num ~ random, data = survey_num)
##
## Welch Two Sample t-test
##
## data: q20_num by random
## t = 0.27966, df = 20.604, p-value = 0.7825
## alternative hypothesis: true difference in means between group Control and group Treatment is not equal to 0
## 95 percent confidence interval:
## -0.8593067 1.1259734
## sample estimates:
## mean in group Control mean in group Treatment
## 2.800000 2.666667
cat("
Write your interpretation here:
- Compare Control vs Treatment
- Mention if results are statistically significant (p < 0.05)
- Explain if hypothesis is supported or not
")
Write your interpretation here: