#Step 2: Open the Installed Packages

library(readxl)
library(ggpubr)
## Loading required package: ggplot2
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(effectsize)
library(effsize)

#Step 3: Import and Name Dataset

Dataset6.1 <- read_excel("C:/Users/cniti/Documents/AA-5221 Applied Analytics/Assignment 6/Dataset6.1.xlsx")

#Step 4: Calculate Descriptive Statistics for Each Group

Dataset6.1 %>%
  group_by(Exam_Score) %>%
  summarise(
    Mean = mean(Exam_Score, na.rm = TRUE),
    Median = median(Exam_Score, na.rm = TRUE),
    SD = sd(Exam_Score, na.rm = TRUE),
    N = n()
  )
## # A tibble: 80 × 5
##    Exam_Score  Mean Median    SD     N
##         <dbl> <dbl>  <dbl> <dbl> <int>
##  1       53.5  53.5   53.5    NA     1
##  2       59.6  59.6   59.6    NA     1
##  3       61.9  61.9   61.9    NA     1
##  4       62.2  62.2   62.2    NA     1
##  5       62.3  62.3   62.3    NA     1
##  6       63.0  63.0   63.0    NA     1
##  7       63.4  63.4   63.4    NA     1
##  8       63.9  63.9   63.9    NA     1
##  9       64.5  64.5   64.5    NA     1
## 10       66.3  66.3   66.3    NA     1
## # ℹ 70 more rows

#Step 5: Create Histograms for Each Group

hist(Dataset6.1$Exam_Score[Dataset6.1$Group == "Tutoring"],
     main = "Histogram of Tutoring Scores",
     xlab = "Value",
     ylab = "Frequency",
     col = "lightblue",
     border = "black",
     breaks = 10)

hist(Dataset6.1$Exam_Score[Dataset6.1$Group == "No Tutoring"],
     main = "Histogram of No Tutoring Scores",
     xlab = "Value",
     ylab = "Frequency",
     col = "lightgreen",
     border = "black",
     breaks = 10)

#Step 6: Create Boxplots for Each Group

ggboxplot(Dataset6.1, x = "Group", y = "Exam_Score",
          color = "Group",
          palette = "jco",
          add = "jitter")

#Step 7: Shapiro-Wilk Test of Normality

shapiro.test(Dataset6.1$Exam_Score[Dataset6.1$Group == "Tutoring"])
## 
##  Shapiro-Wilk normality test
## 
## data:  Dataset6.1$Exam_Score[Dataset6.1$Group == "Tutoring"]
## W = 0.98859, p-value = 0.953
shapiro.test(Dataset6.1$Exam_Score[Dataset6.1$Group == "No Tutoring"])
## 
##  Shapiro-Wilk normality test
## 
## data:  Dataset6.1$Exam_Score[Dataset6.1$Group == "No Tutoring"]
## W = 0.98791, p-value = 0.9398

#Step 7: Conduct Inferential Test

t.test(Exam_Score ~ Group, data = Dataset6.1, var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  Exam_Score by Group
## t = -3.8593, df = 78, p-value = 0.000233
## alternative hypothesis: true difference in means between group No Tutoring and group Tutoring is not equal to 0
## 95 percent confidence interval:
##  -9.724543 -3.105845
## sample estimates:
## mean in group No Tutoring    mean in group Tutoring 
##                  71.94627                  78.36147
wilcox.test(Exam_Score ~ Group, data = Dataset6.1)
## 
##  Wilcoxon rank sum exact test
## 
## data:  Exam_Score by Group
## W = 419, p-value = 0.0001833
## alternative hypothesis: true location shift is not equal to 0

#Step 7: Calculate the Effect Size

cohens_d_result <- cohens_d(Exam_Score ~ Group, data = Dataset6.1, pooled_sd = TRUE)
print(cohens_d_result)
## Cohen's d |         95% CI
## --------------------------
## -0.86     | [-1.32, -0.40]
## 
## - Estimated using pooled SD.
cliff.delta(Exam_Score ~ Group, data = Dataset6.1)
## 
## Cliff's Delta
## 
## delta estimate: -0.47625 (large)
## 95 percent confidence interval:
##      lower      upper 
## -0.6640312 -0.2319561

Step 8: Report the Results Students who participated in tutoring (M = 78.36, SD = 7.68) scored significantly higher than students who did not participate in tutoring (M = 71.95, SD = 7.18), t(78) = −3.86, p < .001. The effect size was large (Cohen’s d = 0.86)