Loading Libraries
library(readxl)
library(ggpubr)
## Loading required package: ggplot2
library(effectsize)
library(rstatix)
##
## Attaching package: 'rstatix'
## The following objects are masked from 'package:effectsize':
##
## cohens_d, eta_squared
## The following object is masked from 'package:stats':
##
## filter
Research Question: Does a six-week physical activity program improve
student stress levels?
Loading Dataset
DatasetRQ4 <- read_excel("DatasetRQ4.xlsx")
Checking Data and Dataset Structure
head(DatasetRQ4)
## # A tibble: 6 × 3
## Student_ID Stress_Pre Stress_Post
## <dbl> <dbl> <dbl>
## 1 1 53.5 45.5
## 2 2 37.4 33.9
## 3 3 35.8 9.49
## 4 4 89.0 82.8
## 5 5 30.5 26.8
## 6 6 42.5 26.9
str(DatasetRQ4)
## tibble [35 × 3] (S3: tbl_df/tbl/data.frame)
## $ Student_ID : num [1:35] 1 2 3 4 5 6 7 8 9 10 ...
## $ Stress_Pre : num [1:35] 53.5 37.4 35.8 89 30.5 ...
## $ Stress_Post: num [1:35] 45.48 33.92 9.49 82.77 26.82 ...
Seperate the Data by Condition
Stress_Pre <- DatasetRQ4$Stress_Pre
Stress_Post <- DatasetRQ4$Stress_Post
Differences <- Stress_Post - Stress_Pre
print(Differences)
## [1] -8.044361120 -3.529426078 -26.350753567 -6.276567859 -3.701374633
## [6] -15.552084134 -4.356073741 -3.647770620 -0.008788193 -5.302543769
## [11] -8.806996646 -3.200139192 -1.385585292 -19.179181634 3.899627158
## [16] -6.612429249 -2.533382079 -1.317457905 -0.782853924 -32.441286182
## [21] -4.132848699 -6.536275900 -6.896524983 -5.238701383 -36.697172771
## [26] -28.957287115 -9.817187881 -10.508880209 -19.386322836 -10.616491219
## [31] -13.700632408 -7.433337532 -20.087719095 -7.326665368 -15.099150128
Calculate descriptive statistics for each group
cat("Pre-Test Mean: ", mean(Stress_Pre, na.rm = TRUE), "\n")
## Pre-Test Mean: 51.53601
cat("Pre-Test Median: ", median(Stress_Pre, na.rm = TRUE), "\n")
## Pre-Test Median: 47.24008
cat("Pre-Test SD: ", sd(Stress_Pre, na.rm = TRUE), "\n\n")
## Pre-Test SD: 17.21906
cat("Post-Test Mean: ", mean(Stress_Post, na.rm = TRUE), "\n")
## Post-Test Mean: 41.4913
cat("Post-Test Median: ", median(Stress_Post, na.rm = TRUE), "\n")
## Post-Test Median: 40.84836
cat("Post-Test SD: ", sd(Stress_Post, na.rm = TRUE), "\n")
## Post-Test SD: 18.88901
Check normality
Method 1: Histograms
hist(Differences,
main = "Histogram of Difference Scores (Post - Pre)",
xlab = "Change in Stress Levels",
ylab = "Frequency",
col = "purple",
border = "darkblue",
breaks = 15)

cat("Skewness: Negatively skewed ,", "Kurtosis: Bell Curve")
## Skewness: Negatively skewed , Kurtosis: Bell Curve
print("As the data is negatively skewed also the shape is somehow resembling a bell curve")
## [1] "As the data is negatively skewed also the shape is somehow resembling a bell curve"
print("Based on Reports we cannot use the Dependent T-test")
## [1] "Based on Reports we cannot use the Dependent T-test"
Method 2: Boxplots
boxplot(Differences,
main = "Distribution of Score Differences (Post Stress - Pre Stress)",
ylab = "Change in Stress Levels",
col = "lightblue",
border = "darkblue",
horizontal = FALSE)

print("The box plot appears to be abnormal as two outliers are out of the whiskers. which makes this data abnormal")
## [1] "The box plot appears to be abnormal as two outliers are out of the whiskers. which makes this data abnormal"
print("Based on Reports we can use the Wilcoxon-Sign Rank")
## [1] "Based on Reports we can use the Wilcoxon-Sign Rank"
Method 3: Shapiro-Wilk
shapiro.test(Differences)
##
## Shapiro-Wilk normality test
##
## data: Differences
## W = 0.87495, p-value = 0.0008963
print("The value of p < 0.05 (0.0008963), so the data is not normal")
## [1] "The value of p < 0.05 (0.0008963), so the data is not normal"
print("Based on Reports we will use the Wilcoxon Sign Rank")
## [1] "Based on Reports we will use the Wilcoxon Sign Rank"
Interpretation: After conducting all three normality tests, it is
clear we must use a Wilcoxon Sign Rank.
Conduct Inferential Test (Wilcoxon Sign Rank)
wilcox.test(Stress_Pre, Stress_Post, paired = TRUE)
##
## Wilcoxon signed rank exact test
##
## data: Stress_Pre and Stress_Post
## V = 620, p-value = 2.503e-09
## alternative hypothesis: true location shift is not equal to 0
print("As the value of p < 0.05 (p = 0.000000002503), this means the results were SIGNIFICANT.")
## [1] "As the value of p < 0.05 (p = 0.000000002503), this means the results were SIGNIFICANT."
Calculate the Effect Size (Rank Biserial Correlation for
Mann-Whitney U)
df_long <- data.frame(
id = rep(1:length(Stress_Pre), 2),
time = rep(c("Pre", "Post"), each = length(Stress_Pre)),
stress = c(Stress_Pre, Stress_Post)
)
effect_size_result <- wilcox_effsize(df_long, stress ~ time, paired = TRUE)
print(effect_size_result)
## # A tibble: 1 × 7
## .y. group1 group2 effsize n1 n2 magnitude
## * <chr> <chr> <chr> <dbl> <int> <int> <ord>
## 1 stress Post Pre 0.844 35 35 large
print("As the size of the effect is (0.84), this means the effect is 'Very Large'.")
## [1] "As the size of the effect is (0.84), this means the effect is 'Very Large'."
Report the Results
cat("There was a significant difference in the stress between Pre-Stress Group (Mdn = 47.24) and Post-Stress (Mdn = 40.84), V = 620, p = .000000002503 The effect size was very large (r₍rb₎ = .84).")
## There was a significant difference in the stress between Pre-Stress Group (Mdn = 47.24) and Post-Stress (Mdn = 40.84), V = 620, p = .000000002503 The effect size was very large (r₍rb₎ = .84).