Loading Libraries

library(readxl)
library(ggpubr)
## Loading required package: ggplot2
library(effectsize)
library(rstatix)
## 
## Attaching package: 'rstatix'
## The following objects are masked from 'package:effectsize':
## 
##     cohens_d, eta_squared
## The following object is masked from 'package:stats':
## 
##     filter

Research Question: Does a six-week physical activity program improve student stress levels?

Loading Dataset

DatasetRQ4 <- read_excel("DatasetRQ4.xlsx")

Checking Data and Dataset Structure

head(DatasetRQ4)
## # A tibble: 6 × 3
##   Student_ID Stress_Pre Stress_Post
##        <dbl>      <dbl>       <dbl>
## 1          1       53.5       45.5 
## 2          2       37.4       33.9 
## 3          3       35.8        9.49
## 4          4       89.0       82.8 
## 5          5       30.5       26.8 
## 6          6       42.5       26.9
str(DatasetRQ4)
## tibble [35 × 3] (S3: tbl_df/tbl/data.frame)
##  $ Student_ID : num [1:35] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Stress_Pre : num [1:35] 53.5 37.4 35.8 89 30.5 ...
##  $ Stress_Post: num [1:35] 45.48 33.92 9.49 82.77 26.82 ...

Seperate the Data by Condition

Stress_Pre <- DatasetRQ4$Stress_Pre
Stress_Post <- DatasetRQ4$Stress_Post

Differences <- Stress_Post - Stress_Pre
print(Differences)
##  [1]  -8.044361120  -3.529426078 -26.350753567  -6.276567859  -3.701374633
##  [6] -15.552084134  -4.356073741  -3.647770620  -0.008788193  -5.302543769
## [11]  -8.806996646  -3.200139192  -1.385585292 -19.179181634   3.899627158
## [16]  -6.612429249  -2.533382079  -1.317457905  -0.782853924 -32.441286182
## [21]  -4.132848699  -6.536275900  -6.896524983  -5.238701383 -36.697172771
## [26] -28.957287115  -9.817187881 -10.508880209 -19.386322836 -10.616491219
## [31] -13.700632408  -7.433337532 -20.087719095  -7.326665368 -15.099150128

Calculate descriptive statistics for each group

cat("Pre-Test Mean: ", mean(Stress_Pre, na.rm = TRUE), "\n")
## Pre-Test Mean:  51.53601
cat("Pre-Test Median: ", median(Stress_Pre, na.rm = TRUE), "\n")
## Pre-Test Median:  47.24008
cat("Pre-Test SD: ", sd(Stress_Pre, na.rm = TRUE), "\n\n")
## Pre-Test SD:  17.21906
cat("Post-Test Mean: ", mean(Stress_Post, na.rm = TRUE), "\n")
## Post-Test Mean:  41.4913
cat("Post-Test Median: ", median(Stress_Post, na.rm = TRUE), "\n")
## Post-Test Median:  40.84836
cat("Post-Test SD: ", sd(Stress_Post, na.rm = TRUE), "\n")
## Post-Test SD:  18.88901

Check normality

Method 1: Histograms

hist(Differences,
     main = "Histogram of Difference Scores (Post - Pre)",
     xlab = "Change in Stress Levels",
     ylab = "Frequency",
     col = "purple",
     border = "darkblue",
     breaks = 15)

cat("Skewness: Negatively skewed ,", "Kurtosis: Bell Curve")
## Skewness: Negatively skewed , Kurtosis: Bell Curve
print("As the data is negatively skewed also the shape is somehow resembling a bell curve")
## [1] "As the data is negatively skewed also the shape is somehow resembling a bell curve"
print("Based on Reports we cannot use the Dependent T-test")
## [1] "Based on Reports we cannot use the Dependent T-test"

Method 2: Boxplots

boxplot(Differences,
        main = "Distribution of Score Differences (Post Stress - Pre Stress)",
        ylab = "Change in Stress Levels",
        col = "lightblue",
        border = "darkblue",
        horizontal = FALSE)

print("The box plot appears to be abnormal as two outliers are out of the whiskers. which makes this data abnormal")
## [1] "The box plot appears to be abnormal as two outliers are out of the whiskers. which makes this data abnormal"
print("Based on Reports we can use the Wilcoxon-Sign Rank")
## [1] "Based on Reports we can use the Wilcoxon-Sign Rank"

Method 3: Shapiro-Wilk

shapiro.test(Differences)
## 
##  Shapiro-Wilk normality test
## 
## data:  Differences
## W = 0.87495, p-value = 0.0008963
print("The value of p < 0.05 (0.0008963), so the data is not normal")
## [1] "The value of p < 0.05 (0.0008963), so the data is not normal"
print("Based on Reports we will use the Wilcoxon Sign Rank")
## [1] "Based on Reports we will use the Wilcoxon Sign Rank"

Interpretation: After conducting all three normality tests, it is clear we must use a Wilcoxon Sign Rank.

Conduct Inferential Test (Wilcoxon Sign Rank)

wilcox.test(Stress_Pre, Stress_Post, paired = TRUE)
## 
##  Wilcoxon signed rank exact test
## 
## data:  Stress_Pre and Stress_Post
## V = 620, p-value = 2.503e-09
## alternative hypothesis: true location shift is not equal to 0
print("As the value of p < 0.05 (p = 0.000000002503), this means the results were SIGNIFICANT.")
## [1] "As the value of p < 0.05 (p = 0.000000002503), this means the results were SIGNIFICANT."

Calculate the Effect Size (Rank Biserial Correlation for Mann-Whitney U)

df_long <- data.frame(
  id = rep(1:length(Stress_Pre), 2),
  time = rep(c("Pre", "Post"), each = length(Stress_Pre)),
  stress = c(Stress_Pre, Stress_Post)
)
  
effect_size_result <- wilcox_effsize(df_long, stress ~ time, paired = TRUE)
print(effect_size_result)
## # A tibble: 1 × 7
##   .y.    group1 group2 effsize    n1    n2 magnitude
## * <chr>  <chr>  <chr>    <dbl> <int> <int> <ord>    
## 1 stress Post   Pre      0.844    35    35 large
print("As the size of the effect is (0.84), this means the effect is 'Very Large'.")
## [1] "As the size of the effect is (0.84), this means the effect is 'Very Large'."

Report the Results

cat("There was a significant difference in the stress between Pre-Stress Group (Mdn = 47.24) and Post-Stress (Mdn = 40.84), V = 620, p = .000000002503 The effect size was very large (r₍rb₎ = .84).")
## There was a significant difference in the stress between Pre-Stress Group (Mdn = 47.24) and Post-Stress (Mdn = 40.84), V = 620, p = .000000002503 The effect size was very large (r₍rb₎ = .84).