## Step 1: Install the Required Packages

#Install the packages you need to conduct a Wilcoxon Signed-Rank test. Packages only need to be installed once. Do **NOT** run the install code more than once after successful installation.


#install.packages("readxl")
#install.packages("ggpubr")
#install.packages("effectsize")
#install.packages("rstatix")


## Step 2: Open the Installed Packages

#Open the packages you have installed into R. You must re-open your packages every time you start a new RStudio session.


library(readxl)
library(ggpubr)
## Loading required package: ggplot2
library(effectsize)
library(rstatix)
## 
## Attaching package: 'rstatix'
## The following objects are masked from 'package:effectsize':
## 
##     cohens_d, eta_squared
## The following object is masked from 'package:stats':
## 
##     filter
## Step 3: Import and Name Dataset

#Import your Excel dataset into RStudio.


Dataset6.4 <- read_excel("/Users/ha113ab/Desktop/datasets/Dataset6.4.xlsx")


## Step 4: Separate the Data by Condition

#Create objects for the pre-test scores, post-test scores, and the difference between them.

Before <- Dataset6.4$Stress_Pre
After <- Dataset6.4$Stress_Post
Differences <- After - Before


## Step 5: Calculate Descriptive Statistics for Each Group

#Calculate the descriptive statistics. For a Wilcoxon Signed-Rank test, we report the median.


mean(Dataset6.4$Stress_Pre, na.rm = TRUE)
## [1] 51.53601
median(Dataset6.4$Stress_Pre, na.rm = TRUE)
## [1] 47.24008
sd(Dataset6.4$Stress_Pre, na.rm = TRUE)
## [1] 17.21906
mean(Dataset6.4$Stress_Post, na.rm = TRUE)
## [1] 41.4913
median(Dataset6.4$Stress_Post, na.rm = TRUE)
## [1] 40.84836
sd(Dataset6.4$Stress_Post, na.rm = TRUE)
## [1] 18.88901
#**Descriptive Statistics Output:**
#- **Pre-test:** Mean = 65.87, Median = 67.33, SD = 9.50
#- **Post-test:** Mean = 57.91, Median = 59.15, SD = 10.17

## Step 6: Create Histograms for Each Group

#Create histograms to visualize the distribution of scores for each condition.


# Pre-test histogram
hist(Dataset6.4$Stress_Pre,
     main = "Histogram of Stress_Pre Scores",
     xlab = "Value",
     ylab = "Frequency",
     col = "blue",
     border = "black",
     breaks = 20)

# The Stress_Pre histogram appears symmetrical and bell-shaped, suggesting normal distribution.


# Post-test histogram
hist(Dataset6.4$Stress_Post,
     main = "Histogram of Stress_Post Scores",
     xlab = "Value",
     ylab = "Frequency",
     col = "blue",
     border = "black",
     breaks = 20)

#The Stress_Post histogram appears symmetrical and bell-shaped, suggesting normal distribution.

## Step 7: Create Boxplots for Each Group

#Create boxplots to check for outliers in each condition.


# Pre-test boxplot
boxplot(Dataset6.4$Stress_Pre,
        main = "Distribution of Stress_Pre Scores",
        ylab = "Stress Score",
        col = "green",
        border = "black")

#The Stress_Pre boxplot shows two dots outside of the boxplot, indicating the presence of outliers. This suggests the data may not be normally distributed.


# Post-test boxplot

boxplot(Dataset6.4$Stress_Post,
        main = "Distribution of Stress_Post Scores",
        ylab = "Stress Score",
        col = "green",
        border = "black")

#The Stress_Post boxplot has no dots outside the whiskers, indicating no outliers and suggesting normal distribution.

## Step 8: Check Normality of Difference Scores

#For a dependent t-test, we need to check the normality of the difference scores.


# Boxplot of differences
boxplot(Differences,
        main = "Distribution of Difference Scores (Post - Pre)",
        ylab = "Difference in Scores",
        col = "green",
        border = "black")

#The difference scores show outliers, indicating the data is NOT normal.

## Step 9: Conduct Wilcoxon Signed-Rank Test

#Since the data is not normal, we use the Wilcoxon signed-rank test instead of the paired t-test.


#wilcox.test(Dataset6.3$Stress_Pre, Dataset6.3$Stress_Post, paired = TRUE)


#**Wilcoxon signed rank exact test result:**
#- **V = 518**
#- **p-value = 0.0005508**

#The p-value is less than 0.05, indicating a **SIGNIFICANT** difference between pre and post stress levels. We will calculate the effect size.

## Step 10: Calculate Effect Size

#Calculate the effect size (rank biserial correlation) for the Wilcoxon test.


df_long <- data.frame(
  id = rep(1:length(Dataset6.4$Stress_Pre), 2),
  time = rep(c("Pre", "Post"), each = length(Dataset6.4$Stress_Pre)),
  score = c(Dataset6.4$Stress_Pre, Dataset6.4$Stress_Post)
)

wilcox_effsize(df_long, score ~ time, paired = TRUE)
## # A tibble: 1 × 7
##   .y.   group1 group2 effsize    n1    n2 magnitude
## * <chr> <chr>  <chr>    <dbl> <int> <int> <ord>    
## 1 score Post   Pre      0.844    35    35 large
#**Effect Size Result:** **0.562**

#**Guide:**
#- 0.00 to 0.09 = small
#- 0.10 to 0.29 = moderate
#- 0.30 to 0.49 = large
#- 0.50 to 1.00 = very large

#The effect size of 0.562 is considered **VERY LARGE**.

## Step 11: Report the Results

#**Final Report:**

#There was a significant difference in stress levels between Pre-test (Mdn = 67.33) and Post-test (Mdn = 59.15), V = 518, p < .001. The effect size was very large (r₍rb₎ = 0.56).