#Open the Installed Packages

library(readxl)
library(ggpubr)
## Loading required package: ggplot2
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(effectsize)
library(effsize)

#Import the Dataset

Dataset6.1 <- read_excel('/Users/atharvapitke/Documents/Analytics/Assignment6/Dataset6.1.xlsx')

#Calculate Descriptive Statistics for Each Group

Dataset6.1 %>%
  group_by(Group) %>%
  summarise(
    Mean = mean(Exam_Score, na.rm = TRUE),
    Median = median(Exam_Score, na.rm = TRUE),
    SD = sd(Exam_Score, na.rm = TRUE),
    N = n()
  )
## # A tibble: 2 × 5
##   Group        Mean Median    SD     N
##   <chr>       <dbl>  <dbl> <dbl> <int>
## 1 No Tutoring  71.9   71.5  7.68    40
## 2 Tutoring     78.4   78.7  7.18    40

#Create Histograms for Each Group

hist(Dataset6.1$Exam_Score[Dataset6.1$Group == "Tutoring"],
     main = "Histogram of Tutoring Exam Scores",
     xlab = "Value",
     ylab = "Frequency",
     col = "lightblue",
     border = "black",
     breaks = 10)

hist(Dataset6.1$Exam_Score[Dataset6.1$Group == "No Tutoring"],
     main = "Histogram of No Tutoring Exam Scores",
     xlab = "Value",
     ylab = "Frequency",
     col = "lightgreen",
     border = "black",
     breaks = 10)

For the Tutoring histogram, the data appears negatively skewed. It is difficult to state the exact kurtosis, but it appears abnormal. For the NoTutoring histogram, the data appears symmetrical (normal). The kurtosis also appears bell-shaped (normal). We may need to use a Independent T-test.

#Create Boxplots for Each Group

ggboxplot(Dataset6.1, x = "Group", y = "Exam_Score",
          color = "Group",
          palette = "jco",
          add = "jitter")

The Tutoring boxplot appears abnormal. There are several dots past the whiskers, though some are close and some are far away. The No Tutoring boxplot appears abnormal. There are several dots past the whiskers, though some are close and some are far away. We may need to use a Independent T test.

#Shapiro-Wilk Test of Normality

shapiro.test(Dataset6.1$Exam_Score[Dataset6.1$Group == "Tutoring"])
## 
##  Shapiro-Wilk normality test
## 
## data:  Dataset6.1$Exam_Score[Dataset6.1$Group == "Tutoring"]
## W = 0.98859, p-value = 0.953
shapiro.test(Dataset6.1$Exam_Score[Dataset6.1$Group == "No Tutoring"])
## 
##  Shapiro-Wilk normality test
## 
## data:  Dataset6.1$Exam_Score[Dataset6.1$Group == "No Tutoring"]
## W = 0.98791, p-value = 0.9398

The data for Tutoring Group was normal (p > .05). The data for No Tutoring Group was normal (p < .05). After conducting all two normality tests, it is clear we must use a Independent T test.

#Conduct Inferential Test

t.test(Exam_Score ~ Group, data = Dataset6.1, var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  Exam_Score by Group
## t = -3.8593, df = 78, p-value = 0.000233
## alternative hypothesis: true difference in means between group No Tutoring and group Tutoring is not equal to 0
## 95 percent confidence interval:
##  -9.724543 -3.105845
## sample estimates:
## mean in group No Tutoring    mean in group Tutoring 
##                  71.94627                  78.36147

The p < .05, (less than .05), this means the results were SIGNIFICANT. Continue to Effect Size

#Calculate the Effect Size #Cohen’s D for Independent T-Test

cohens_d_result <- cohens_d(Exam_Score ~ Group, data = Dataset6.1, pooled_sd = TRUE)
print(cohens_d_result)
## Cohen's d |         95% CI
## --------------------------
## -0.86     | [-1.32, -0.40]
## 
## - Estimated using pooled SD.

According to the difference between the group averages the Cohen’s d for Dataset6.1 = -0.86 falls under ± 0.80 to 1.29 = large

#Results

Tutoring Group (M = 78.4, SD = 7.18) were significantly different from No Tutoring (M = 71.9, SD = 7.68) in exam_score, t(78) = −3.86, p = .000233. The effect size was large (Cohen’s d = -0.86).