library(readxl)
library(ggpubr)
## Loading required package: ggplot2
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(effectsize)
library(effsize)
Dataset6.2 <- read_excel("/Users/srikarthikeya/Downloads/Dataset6.2.xlsx")
Dataset6.2 %>%
  group_by(Work_Status) %>%
  summarise(
    Mean = mean(Study_Hours, na.rm = TRUE),
    Median = median(Study_Hours, na.rm = TRUE),
    SD = sd(Study_Hours, na.rm = TRUE),
    N = n()
  )
## # A tibble: 2 × 5
##   Work_Status    Mean Median    SD     N
##   <chr>         <dbl>  <dbl> <dbl> <int>
## 1 Does_Not_Work  9.62   8.54  7.45    30
## 2 Works          6.41   5.64  4.41    30
hist(Dataset6.2$Study_Hours[Dataset6.2$Work_Status == "Does_Not_Work"],
     main = "Histogram of Does_Not_Work",
     ylab = "Frequency",
     col = "lightblue",
     border = "black",
     breaks = 10)

hist(Dataset6.2$Study_Hours[Dataset6.2$Work_Status == "Works"],
     main = "Histogram of Works",
     xlab = "Value",
     ylab = "Frequency",
     col = "lightgreen",
     border = "black",
     breaks = 10)

For the Does_Not_Work histogram, The Graph appears to be tall and kind of postively skewed. The data is abnormal. For the Works histogram, The Graph appears to be postively skewed. The data is abnormal.

ggboxplot(Dataset6.2, x = "Work_Status", y = "Study_Hours",
          color = "Work_Status",
          palette = "jco",
          add = "jitter")

The Works boxplot appears normal. There are no dots past the whiskers. The Does_Not_Work boxplot appears abnormal. There are 2 dots past the up side whiskers. Although some are very close to the whiskers, some are arguably far away . We may need to use a Mann-Whitney U test.

shapiro.test(Dataset6.2$Study_Hours[Dataset6.2$Work_Status == "Works"])
## 
##  Shapiro-Wilk normality test
## 
## data:  Dataset6.2$Study_Hours[Dataset6.2$Work_Status == "Works"]
## W = 0.94582, p-value = 0.1305
shapiro.test(Dataset6.2$Study_Hours[Dataset6.2$Work_Status == "Does_Not_Work"])
## 
##  Shapiro-Wilk normality test
## 
## data:  Dataset6.2$Study_Hours[Dataset6.2$Work_Status == "Does_Not_Work"]
## W = 0.83909, p-value = 0.0003695

The data for Works was normal p= 0.1305 (p > .05). The data for Does_Not_Work was not normal p= 0.0003695 (p < .05). After conducting all three normality tests, it is clear we must use a Mann-Whitney U test.

wilcox.test(Study_Hours ~ Work_Status, data = Dataset6.2)
## 
##  Wilcoxon rank sum exact test
## 
## data:  Study_Hours by Work_Status
## W = 569, p-value = 0.07973
## alternative hypothesis: true location shift is not equal to 0

p > .05 (0.0797), this means the results were NOT significant.

Since they were not significant we dont have to calculate the effect size.

Does_Not_Work ((Mdn = 8.54) was not significantly different from Works (Mdn = 5.64), U = 569, p = 0.0797.