#Libraries and Packages
library(ggplot2)
library(dplyr)
library(stringr)
library(tidyr)
library(forcats)
library(nlme)
library(ggalluvial)
library(ordinal)
library(ggeffects)
library(ggplot2)
data=read.csv("deident_final_merged_data.csv")
data <- subset(data, !is.na(Treatment))
#Average score between Treatments, including Child.Course.ID as a random effect
#Control group is the reference by default
model <- lme(
Exam.Score ~ Treatment,
random = ~1 | Child.Course.ID,
data = data,
na.action = na.omit
)
anova(model)
summary(model)
Linear mixed-effects model fit by REML
Data: data
Random effects:
Formula: ~1 | Child.Course.ID
(Intercept) Residual
StdDev: 0.0003621712 10.50213
Fixed effects: Exam.Score ~ Treatment
Correlation:
(Intr) TrtmNA TrtmnQ
TreatmentNonsensical Answer -0.715
TreatmentQuestion -0.719 0.515
TreatmentValid Answer -0.711 0.509 0.512
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-4.0545487 -0.6853066 0.1350778 0.8014248 1.5770611
Number of Observations: 170
Number of Groups: 4
stai <- lme(
Post.STAI.score ~ Treatment,
random = ~1 | Pre.STAI.score,
data = data,
na.action = na.omit
)
anova(stai)
summary(stai)
Linear mixed-effects model fit by REML
Data: data
Random effects:
Formula: ~1 | Pre.STAI.score
(Intercept) Residual
StdDev: 9.405543 14.0588
Fixed effects: Post.STAI.score ~ Treatment
Correlation:
(Intr) TrtmNA TrtmnQ
TreatmentNonsensical Answer -0.572
TreatmentQuestion -0.551 0.542
TreatmentValid Answer -0.575 0.541 0.545
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-3.18541483 -0.50046122 0.03709113 0.68856269 1.99818647
Number of Observations: 126
Number of Groups: 18
clean_data <- data %>%
filter(
!is.na(Pre.STAI.Cat),
!is.na(Post.STAI.Cat),
trimws(Pre.STAI.Cat) != "",
trimws(Post.STAI.Cat) != "",
!grepl("^[0-9]+$", Pre.STAI.Cat),
!grepl("^[0-9]+$", Post.STAI.Cat)
)
flow_data <- clean_data %>%
count(Pre.STAI.Cat, Post.STAI.Cat) %>%
group_by(Pre.STAI.Cat) %>%
mutate(prop = n / sum(n))
ggplot(flow_data,
aes(axis1 = Pre.STAI.Cat,
axis2 = Post.STAI.Cat,
y = n)) +
geom_alluvium(aes(fill = Pre.STAI.Cat), width = 0.2) +
geom_stratum(width = 0.2, fill = "grey80", color = "black") +
geom_text(stat = "stratum", aes(label = after_stat(stratum))) +
scale_x_discrete(limits = c("Pre", "Post"), expand = c(.1, .1)) +
scale_fill_brewer(palette = "Set2") +
labs(title = "Change in STAI Categories",
y = "Count") +
theme_minimal()
#give the proportion within each pre-category that is allocated to the post-category. So if the proportion is 0.70, then 70% went from high anxiety to high anxiety. And 18% went from high to moderate anxiety.
flow_data
#run an ordinal linear regression, just in case
# Make ordered factors (IMPORTANT)
data$Pre.STAI.Cat <- factor(data$Pre.STAI.Cat, ordered = TRUE)
data$Post.STAI.Cat <- factor(data$Post.STAI.Cat, ordered = TRUE)
model <- clmm(
Post.STAI.Cat ~ Treatment + (1 | Pre.STAI.Cat),
data = data
)
summary(model)
Cumulative Link Mixed Model fitted with the Laplace approximation
formula: Post.STAI.Cat ~ Treatment + (1 | Pre.STAI.Cat)
data: data
Random effects:
Groups Name Variance Std.Dev.
Pre.STAI.Cat (Intercept) 3.334e-09 5.774e-05
Number of groups: Pre.STAI.Cat 4
Coefficients:
Estimate Std. Error z value Pr(>|z|)
TreatmentNonsensical Answer 0.1150 0.4462 0.258 0.797
TreatmentQuestion -0.4181 0.4607 -0.908 0.364
TreatmentValid Answer 0.2991 0.4576 0.654 0.513
Threshold coefficients:
Estimate Std. Error z value
|2 -2.2369 0.4001 -5.590
2|High Anxiety -2.1647 0.3950 -5.481
High Anxiety|Moderate Anxiety 0.7114 0.3430 2.074
Moderate Anxiety|No or Low Anxiety 1.7379 0.3736 4.652
(19 observations deleted due to missingness)
model <- lme(
Post.STAI.score ~ Treatment * Exam.Score,
random = list(
Pre.STAI.Cat = ~1,
Child.Course.ID = ~1
),
data = data,
na.action = na.omit
)
anova(model)
summary(model)
Linear mixed-effects model fit by REML
Data: data
Random effects:
Formula: ~1 | Pre.STAI.Cat
(Intercept)
StdDev: 3.701099
Formula: ~1 | Child.Course.ID %in% Pre.STAI.Cat
(Intercept) Residual
StdDev: 1.6521 15.00977
Fixed effects: Post.STAI.score ~ Treatment * Exam.Score
Correlation:
(Intr) TrtmNA TrtmnQ TrtmVA Exm.Sc TNA:E. TQ:E.S
TreatmentNonsensical Answer -0.777
TreatmentQuestion -0.707 0.566
TreatmentValid Answer -0.703 0.562 0.510
Exam.Score -0.964 0.770 0.703 0.697
TreatmentNonsensical Answer:Exam.Score 0.769 -0.974 -0.562 -0.557 -0.799
TreatmentQuestion:Exam.Score 0.695 -0.558 -0.978 -0.501 -0.723 0.579
TreatmentValid Answer:Exam.Score 0.688 -0.553 -0.501 -0.977 -0.715 0.574 0.516
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-3.30294310 -0.55813099 -0.04116798 0.66455128 2.08483132
Number of Observations: 139
Number of Groups:
Pre.STAI.Cat Child.Course.ID %in% Pre.STAI.Cat
4 16
pred <- ggpredict(model, terms = c("Exam.Score", "Treatment"))
ggplot(pred, aes(x = x, y = predicted, color = group, fill = group)) +
geom_line(linewidth = 1.2) +
geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.2, color = NA) +
labs(
x = "Exam Score",
y = "Predicted Post STAI Score",
color = "Treatment",
fill = "Treatment"
) +
theme_classic()
library(tidyr)
library(dplyr)
pre <- data %>%
filter(
!is.na(Treatment),
!is.na(Pre.STAI.Cat),
trimws(Treatment) != "",
trimws(Pre.STAI.Cat) != "",
tolower(trimws(Pre.STAI.Cat)) != "missing",
!grepl("^\\d+$", trimws(Pre.STAI.Cat)) # removes numeric-only categories
) %>%
count(Treatment, Category = Pre.STAI.Cat) %>%
mutate(Time = "Pre")
post <- data %>%
filter(
!is.na(Treatment),
!is.na(Post.STAI.Cat),
trimws(Treatment) != "",
trimws(Post.STAI.Cat) != "",
tolower(trimws(Post.STAI.Cat)) != "missing",
!grepl("^\\d+$", trimws(Post.STAI.Cat)) # removes numeric-only categories
) %>%
count(Treatment, Category = Post.STAI.Cat) %>%
mutate(Time = "Post")
ggplot(data, aes(x = Exam.Score)) +
geom_histogram() +
theme_classic() +
labs(x = "Exam Score", y = "Count")
Less.Anxious Distracted Easy.to.understand Notice Less.stress.intim Interfered.Seriousness
allowed_levels <- c(
"Strongly disagree",
"Somewhat disagree",
"Neither agree nor disagree",
"Somewhat agree",
"Strongly agree"
)
likert_cols <- c("Less.Anxious",
"Distracted",
"Easy.to.understand",
"Notice",
"Less.stress.intim",
"Interfered.Seriousness")
long_data <- data %>%
select(all_of(c("Treatment", likert_cols))) %>%
pivot_longer(
cols = all_of(likert_cols),
names_to = "Question",
values_to = "Response"
)
long_data <- long_data %>%
mutate(
Response = factor(Response, levels = allowed_levels, ordered = TRUE),
Treatment = factor(Treatment)
)
summary_data <- long_data %>%
group_by(Question, Treatment, Response) %>%
summarise(n = n(), .groups = "drop") %>%
group_by(Question, Treatment) %>%
mutate(prop = n / sum(n))
ggplot(summary_data, aes(x = Treatment, y = prop, fill = Response)) +
geom_col(position = "fill", color = "black", size = 0.2) + # stacked proportional bars
facet_wrap(~ Question, ncol = 2) +
scale_y_continuous(labels = scales::percent_format()) +
scale_fill_brewer(palette = "RdYlBu", direction = -1) + # reversed for agreement going blue
labs(x = "Treatment", y = "Percentage", fill = "Likert Response",
title = "Likert Responses by Treatment and Question") +
theme_minimal(base_size = 14) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom"
)
NA
NA
# Step 1: Define Likert levels and question columns
allowed_levels <- c(
"Strongly disagree",
"Somewhat disagree",
"Neither agree nor disagree",
"Somewhat agree",
"Strongly agree"
)
likert_cols <- c(
"Less.Anxious",
"Distracted",
"Easy.to.understand",
"Notice",
"Less.stress.intim",
"Interfered.Seriousness"
)
# Step 2: Filter invalid responses
clean_data <- data %>%
filter(
if_all(all_of(likert_cols), ~ .x %in% allowed_levels),
!is.na(Treatment),
Treatment != "Control" # Remove Control treatment
)
# Step 3: Reshape to long format
long_data <- clean_data %>%
select(Treatment, all_of(likert_cols)) %>%
pivot_longer(
cols = all_of(likert_cols),
names_to = "Question",
values_to = "Response"
)
# Step 4: Factor levels
long_data <- long_data %>%
mutate(
Response = factor(Response, levels = allowed_levels, ordered = TRUE),
Treatment = factor(Treatment)
)
# Step 5: Calculate proportions
summary_data <- long_data %>%
group_by(Question, Treatment, Response) %>%
summarise(n = n(), .groups = "drop") %>%
group_by(Question, Treatment) %>%
mutate(prop = n / sum(n))
# Step 6: Plot with labels inside bars
ggplot(summary_data, aes(x = Treatment, y = prop, fill = Response)) +
geom_col(position = "fill", color = "black", size = 0.2) +
# Add percentage labels only if >5% to avoid clutter
geom_text(
aes(label = ifelse(prop > 0.05, scales::percent(prop, accuracy = 1), "")),
position = position_fill(vjust = 0.5),
size = 3,
color = "black"
) +
facet_wrap(~ Question, ncol = 2) +
scale_y_continuous(labels = scales::percent_format()) +
scale_fill_brewer(palette = "RdYlBu", direction = -1) +
labs(
title = "Likert Responses by Treatment and Question",
x = "Treatment",
y = "Percentage",
fill = "Likert Response"
) +
theme_minimal(base_size = 14) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom"
)
question_labels <- c(
"Less.Anxious" = "The humorous items helped me feel less anxious during the exam.",
"Distracted" = "The humorous items distracted me from concentrating on the questions.",
"Easy.to.understand" = "I found the humor in the test items easy to understand and appropriate for the class.",
"Notice" = "I noticed that some questions had humorous wording or answer choices.",
"Less.stress.intim" = "The humorous items made the exam feel less intimidating or stressful.",
"Interfered.Seriousness" = "I felt that the humor interfered with how seriously I approached the test."
)
ggplot(summary_data, aes(x = Treatment, y = prop, fill = Response)) +
geom_col(position = "fill", color = "black", size = 0.2) +
geom_text(
aes(label = ifelse(prop > 0.05, scales::percent(prop, accuracy = 1), "")),
position = position_fill(vjust = 0.5),
size = 3,
color = "black"
) +
facet_wrap(~ Question, ncol = 2, labeller = labeller(Question = question_labels)) +
scale_y_continuous(labels = scales::percent_format()) +
scale_fill_brewer(palette = "RdYlBu", direction = -1) +
labs(
title = "Likert Responses by Treatment and Question",
x = "Treatment",
y = "Percentage",
fill = "Likert Response"
) +
theme_minimal(base_size = 14) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom"
)
NA
NA