library(readxl)
library(ggpubr)
## Loading required package: ggplot2
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(effectsize)
library(effsize)
Dataset6.2 <- read_excel("C:/Users/joyce/Downloads/Dataset6.2-2.xlsx")
Dataset6.2 %>%
  group_by(Work_Status) %>%
  summarise(
    Mean = mean(Study_Hours, na.rm = TRUE),
    Median = median(Study_Hours, na.rm = TRUE),
    SD = sd(Study_Hours, na.rm = TRUE),
    N = n()
  )
## # A tibble: 2 × 5
##   Work_Status    Mean Median    SD     N
##   <chr>         <dbl>  <dbl> <dbl> <int>
## 1 Does_Not_Work  9.62   8.54  7.45    30
## 2 Works          6.41   5.64  4.41    30

This code will calculate the descriptive statistic which is the mean, median and sd

hist(Dataset6.2$Study_Hours[Dataset6.2$Work_Status == "Works"],
     main = "Histogram of Works Study Hours",
     xlab = "Study Hours",
     ylab = "Frequency",
     col = "lightblue",
     border = "black",
     breaks = 10)

hist(Dataset6.2$Study_Hours[Dataset6.2$Work_Status == "Does_Not_Work"],
     main = "Histogram of Non Working Study Hours",
     xlab = "Study Hours",
     ylab = "Frequency",
     col = "lightgreen",
     border = "black",
     breaks = 10)

For the working student histogram, the data appears slightly positively skewed. The kurtosis does not appear too tall or flat. Its is not perfectly bell shaped.

For the non working students histogram, the data appears positively skewed. The kurtosis does not appears bell-shaped.

We may need to further examine normality before deciding the test to use.

ggboxplot(Dataset6.2, x = "Work_Status", y = "Study_Hours",
          color = "Work_Status",
          palette = "jco",
          add = "jitter")

The works boxplot appears almost normal. There are extreme dots past the whiskers.

The Does_Not_Work boxplot appears abnormal. There are several dots past the whiskers.

We may need to use a Mann-Whitney U test.

shapiro.test(Dataset6.2$Study_Hours[Dataset6.2$Work_Status == "Works"])
## 
##  Shapiro-Wilk normality test
## 
## data:  Dataset6.2$Study_Hours[Dataset6.2$Work_Status == "Works"]
## W = 0.94582, p-value = 0.1305
shapiro.test(Dataset6.2$Study_Hours[Dataset6.2$Work_Status == "Does_Not_Work"])
## 
##  Shapiro-Wilk normality test
## 
## data:  Dataset6.2$Study_Hours[Dataset6.2$Work_Status == "Does_Not_Work"]
## W = 0.83909, p-value = 0.0003695

The data for works was normal (p > .05). The data for students who do not work was abnormal (p < .05).

After conducting all three normality tests, it is clear we must use a Mann-Whitney U test.

wilcox.test(Study_Hours ~ Work_Status, data = Dataset6.2)

p > .05 (greater than .05), this means the results were NOT significant.

Student who do not work ((Mdn = 8.54) was not significantly different from student who worked (Mdn = 5.64), U = 569, p = .080.There was no effect size.