library(readxl)
library(ggpubr)
## Loading required package: ggplot2
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(effectsize)
library(effsize)
jetray <- read_excel("D:/Vedant Work/SLU/Spring Sem (Jan to May 2026)/Applied Analytics/Assignment 6/Assingment 6.2/jetray.xlsx")
Step 4: Calculate Descriptive Statistics
jetray %>%
group_by(workstatus) %>%
summarise(
Mean = mean(studyhours, na.rm = TRUE),
Median = median(studyhours, na.rm = TRUE),
SD = sd(studyhours, na.rm = TRUE),
N = n()
)
## # A tibble: 2 × 5
## workstatus Mean Median SD N
## <chr> <dbl> <dbl> <dbl> <int>
## 1 nowork 9.62 8.54 7.45 30
## 2 works 6.41 5.64 4.41 30
Step 5: Create Histograms to check normality
hist(jetray$studyhours[jetray$workstatus == "works"],
main = "Histogram of Study Hours (Works)",
xlab = "Study Hours",
ylab = "Frequency",
col = "lightblue",
border = "black",
breaks = 10)
hist(jetray$studyhours[jetray$workstatus == "nowork"],
main = "Histogram of Study Hours (No Work)",
xlab = "Study Hours",
ylab = "Frequency",
col = "lightgreen",
border = "black",
breaks = 10)
Step 6: Create Boxplots to check for outliers
ggboxplot(jetray, x = "workstatus", y = "studyhours",
color = "workstatus",
palette = "jco",
add = "jitter")
Step 7: Conduct Mann-Whitney U Test (Wilcoxon Rank Sum Test) Since the data may not be normally distributed, we use the Mann-Whitney U test Note: wilcox.test() is the Mann-Whitney U test for independent samples
wilcox.test(studyhours ~ workstatus, data = jetray)
##
## Wilcoxon rank sum exact test
##
## data: studyhours by workstatus
## W = 569, p-value = 0.07973
## alternative hypothesis: true location shift is not equal to 0