#Libraries and Packages
library(ggplot2)
library(dplyr)
library(stringr)
library(tidyr)
library(forcats)
library(nlme)
library(ggalluvial)
library(ordinal)
data=read.csv("deident_final_merged_data.csv")
#Average score between Treatments, including Child.Course.ID as a random effect
#Control group is the reference by default
model <- lme(
Exam.Score ~ Treatment,
random = ~1 | Child.Course.ID,
data = data,
na.action = na.omit
)
anova(model)
## numDF denDF F-value p-value
## (Intercept) 1 163 3330.411 <.0001
## Treatment 3 163 0.253 0.8594
summary(model)
## Linear mixed-effects model fit by REML
## Data: data
## AIC BIC logLik
## 1278.808 1297.48 -633.4041
##
## Random effects:
## Formula: ~1 | Child.Course.ID
## (Intercept) Residual
## StdDev: 0.0003621712 10.50213
##
## Fixed effects: Exam.Score ~ Treatment
## Value Std.Error DF t-value p-value
## (Intercept) 47.40244 1.640157 163 28.901158 0.0000
## TreatmentNonsensical Answer -0.82104 2.292402 163 -0.358159 0.7207
## TreatmentQuestion -1.96494 2.279652 163 -0.861947 0.3900
## TreatmentValid Answer -0.81911 2.305684 163 -0.355255 0.7229
## Correlation:
## (Intr) TrtmNA TrtmnQ
## TreatmentNonsensical Answer -0.715
## TreatmentQuestion -0.719 0.515
## TreatmentValid Answer -0.711 0.509 0.512
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -4.0545487 -0.6853066 0.1350778 0.8014248 1.5770611
##
## Number of Observations: 170
## Number of Groups: 4
stai <- lme(
Post.STAI.score ~ Treatment,
random = ~1 | Pre.STAI.score,
data = data,
na.action = na.omit
)
anova(stai)
## numDF denDF F-value p-value
## (Intercept) 1 105 314.88311 <.0001
## Treatment 3 105 0.25178 0.8599
summary(stai)
## Linear mixed-effects model fit by REML
## Data: data
## AIC BIC logLik
## 1037.966 1054.79 -512.9828
##
## Random effects:
## Formula: ~1 | Pre.STAI.score
## (Intercept) Residual
## StdDev: 9.405543 14.0588
##
## Fixed effects: Post.STAI.score ~ Treatment
## Value Std.Error DF t-value p-value
## (Intercept) 48.62837 3.680402 105 13.212787 0.0000
## TreatmentNonsensical Answer -2.51023 3.847176 105 -0.652487 0.5155
## TreatmentQuestion 0.47968 3.816420 105 0.125687 0.9002
## TreatmentValid Answer -0.50500 3.897984 105 -0.129553 0.8972
## Correlation:
## (Intr) TrtmNA TrtmnQ
## TreatmentNonsensical Answer -0.572
## TreatmentQuestion -0.551 0.542
## TreatmentValid Answer -0.575 0.541 0.545
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -3.18541483 -0.50046122 0.03709113 0.68856269 1.99818647
##
## Number of Observations: 126
## Number of Groups: 18
clean_data <- data %>%
filter(
!is.na(Pre.STAI.Cat),
!is.na(Post.STAI.Cat),
trimws(Pre.STAI.Cat) != "",
trimws(Post.STAI.Cat) != "",
!grepl("^[0-9]+$", Pre.STAI.Cat),
!grepl("^[0-9]+$", Post.STAI.Cat)
)
flow_data <- clean_data %>%
count(Pre.STAI.Cat, Post.STAI.Cat) %>%
group_by(Pre.STAI.Cat) %>%
mutate(prop = n / sum(n))
ggplot(flow_data,
aes(axis1 = Pre.STAI.Cat,
axis2 = Post.STAI.Cat,
y = n)) +
geom_alluvium(aes(fill = Pre.STAI.Cat), width = 0.2) +
geom_stratum(width = 0.2, fill = "grey80", color = "black") +
geom_text(stat = "stratum", aes(label = after_stat(stratum))) +
scale_x_discrete(limits = c("Pre", "Post"), expand = c(.1, .1)) +
scale_fill_brewer(palette = "Set2") +
labs(title = "Change in STAI Categories",
y = "Count") +
theme_minimal()
#give the proportion within each pre-category that is allocated to the post-category. So if the proportion is 0.70, then 70% went from high anxiety to high anxiety. And 18% went from high to moderate anxiety.
flow_data
## # A tibble: 9 × 4
## # Groups: Pre.STAI.Cat [3]
## Pre.STAI.Cat Post.STAI.Cat n prop
## <chr> <chr> <int> <dbl>
## 1 High Anxiety High Anxiety 62 0.705
## 2 High Anxiety Moderate Anxiety 16 0.182
## 3 High Anxiety No or Low Anxiety 10 0.114
## 4 Moderate Anxiety High Anxiety 10 0.455
## 5 Moderate Anxiety Moderate Anxiety 6 0.273
## 6 Moderate Anxiety No or Low Anxiety 6 0.273
## 7 No or Low Anxiety High Anxiety 6 0.375
## 8 No or Low Anxiety Moderate Anxiety 3 0.188
## 9 No or Low Anxiety No or Low Anxiety 7 0.438
#run an ordinal linear regression, just in case
# Make ordered factors (IMPORTANT)
data$Pre.STAI.Cat <- factor(data$Pre.STAI.Cat, ordered = TRUE)
data$Post.STAI.Cat <- factor(data$Post.STAI.Cat, ordered = TRUE)
model <- clmm(
Post.STAI.Cat ~ Treatment + (1 | Pre.STAI.Cat),
data = data
)
summary(model)
## Cumulative Link Mixed Model fitted with the Laplace approximation
##
## formula: Post.STAI.Cat ~ Treatment + (1 | Pre.STAI.Cat)
## data: data
##
## link threshold nobs logLik AIC niter max.grad cond.H
## logit flexible 151 -176.86 369.71 646(771) 8.48e-06 1.2e+07
##
## Random effects:
## Groups Name Variance Std.Dev.
## Pre.STAI.Cat (Intercept) 3.334e-09 5.774e-05
## Number of groups: Pre.STAI.Cat 4
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## TreatmentNonsensical Answer 0.1150 0.4462 0.258 0.797
## TreatmentQuestion -0.4181 0.4607 -0.908 0.364
## TreatmentValid Answer 0.2991 0.4576 0.654 0.513
##
## Threshold coefficients:
## Estimate Std. Error z value
## |2 -2.2369 0.4001 -5.590
## 2|High Anxiety -2.1647 0.3950 -5.481
## High Anxiety|Moderate Anxiety 0.7114 0.3430 2.074
## Moderate Anxiety|No or Low Anxiety 1.7379 0.3736 4.652
## (27 observations deleted due to missingness)
Less.Anxious Distracted Easy.to.understand Notice Less.stress.intim Interfered.Seriousness
allowed_levels <- c(
"Strongly disagree",
"Somewhat disagree",
"Neither agree nor disagree",
"Somewhat agree",
"Strongly agree"
)
likert_cols <- c("Less.Anxious",
"Distracted",
"Easy.to.understand",
"Notice",
"Less.stress.intim",
"Interfered.Seriousness")
long_data <- data %>%
select(all_of(c("Treatment", likert_cols))) %>%
pivot_longer(
cols = all_of(likert_cols),
names_to = "Question",
values_to = "Response"
)
long_data <- long_data %>%
mutate(
Response = factor(Response, levels = allowed_levels, ordered = TRUE),
Treatment = factor(Treatment)
)
summary_data <- long_data %>%
group_by(Question, Treatment, Response) %>%
summarise(n = n(), .groups = "drop") %>%
group_by(Question, Treatment) %>%
mutate(prop = n / sum(n))
ggplot(summary_data, aes(x = Treatment, y = prop, fill = Response)) +
geom_col(position = "fill", color = "black", size = 0.2) + # stacked proportional bars
facet_wrap(~ Question, ncol = 2) +
scale_y_continuous(labels = scales::percent_format()) +
scale_fill_brewer(palette = "RdYlBu", direction = -1) + # reversed for agreement going blue
labs(x = "Treatment", y = "Percentage", fill = "Likert Response",
title = "Likert Responses by Treatment and Question") +
theme_minimal(base_size = 14) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom"
)
# Step 1: Define Likert levels and question columns
allowed_levels <- c(
"Strongly disagree",
"Somewhat disagree",
"Neither agree nor disagree",
"Somewhat agree",
"Strongly agree"
)
likert_cols <- c(
"Less.Anxious",
"Distracted",
"Easy.to.understand",
"Notice",
"Less.stress.intim",
"Interfered.Seriousness"
)
# Step 2: Filter invalid responses
clean_data <- data %>%
filter(
if_all(all_of(likert_cols), ~ .x %in% allowed_levels),
!is.na(Treatment),
Treatment != "Control" # Remove Control treatment
)
# Step 3: Reshape to long format
long_data <- clean_data %>%
select(Treatment, all_of(likert_cols)) %>%
pivot_longer(
cols = all_of(likert_cols),
names_to = "Question",
values_to = "Response"
)
# Step 4: Factor levels
long_data <- long_data %>%
mutate(
Response = factor(Response, levels = allowed_levels, ordered = TRUE),
Treatment = factor(Treatment)
)
# Step 5: Calculate proportions
summary_data <- long_data %>%
group_by(Question, Treatment, Response) %>%
summarise(n = n(), .groups = "drop") %>%
group_by(Question, Treatment) %>%
mutate(prop = n / sum(n))
# Step 6: Plot with labels inside bars
ggplot(summary_data, aes(x = Treatment, y = prop, fill = Response)) +
geom_col(position = "fill", color = "black", size = 0.2) +
# Add percentage labels only if >5% to avoid clutter
geom_text(
aes(label = ifelse(prop > 0.05, scales::percent(prop, accuracy = 1), "")),
position = position_fill(vjust = 0.5),
size = 3,
color = "black"
) +
facet_wrap(~ Question, ncol = 2) +
scale_y_continuous(labels = scales::percent_format()) +
scale_fill_brewer(palette = "RdYlBu", direction = -1) +
labs(
title = "Likert Responses by Treatment and Question",
x = "Treatment",
y = "Percentage",
fill = "Likert Response"
) +
theme_minimal(base_size = 14) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom"
)
question_labels <- c(
"Less.Anxious" = "The humorous items helped me feel less anxious during the exam.",
"Distracted" = "The humorous items distracted me from concentrating on the questions.",
"Easy.to.understand" = "I found the humor in the test items easy to understand and appropriate for the class.",
"Notice" = "I noticed that some questions had humorous wording or answer choices.",
"Less.stress.intim" = "The humorous items made the exam feel less intimidating or stressful.",
"Interfered.Seriousness" = "I felt that the humor interfered with how seriously I approached the test."
)
ggplot(summary_data, aes(x = Treatment, y = prop, fill = Response)) +
geom_col(position = "fill", color = "black", size = 0.2) +
geom_text(
aes(label = ifelse(prop > 0.05, scales::percent(prop, accuracy = 1), "")),
position = position_fill(vjust = 0.5),
size = 3,
color = "black"
) +
facet_wrap(~ Question, ncol = 2, labeller = labeller(Question = question_labels)) +
scale_y_continuous(labels = scales::percent_format()) +
scale_fill_brewer(palette = "RdYlBu", direction = -1) +
labs(
title = "Likert Responses by Treatment and Question",
x = "Treatment",
y = "Percentage",
fill = "Likert Response"
) +
theme_minimal(base_size = 14) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom"
)