I used the data from (“https://www.lock5stat.com/datapage3e.html”).Then refer to the data “SleepStudy”.
I proposed the following 10 questions partially based on some of the questions that were given to me within the assignment, and the other’s based on my own understanding of the data given.
Is there a significant difference in the average GPA between male and female college students?
Is there a significant difference in the number of early classes between the first two class years and other class years?
Do students who identify as “Larks” have significantly better cognitive skills than “Owls”?
Is there a significant difference in the number of classes missed between students with and without early classes?
Is there a significant difference in happiness levels between students with moderate and normal depression?
Is there a significant difference in sleep quality between students who pulled an all-nighter and those who didn’t?
Do students who abstain from alcohol use have better stress scores than heavy drinkers?
Is there a significant difference in average drinks per week between male and female students?
Do students with higher weekday sleep hours have significantly different GPA compared to those with lower weekday sleep hours?
Do students in their first two years of college get significantly more sleep on weekends compared to students in later years?
We will explore the following questions in detail.
ggplot(sleep_data_clean, aes(x = factor(Gender), y = GPA, fill = factor(Gender))) +
geom_boxplot() +
labs(title = "GPA by Gender", x = "Gender (0=Female, 1=Male)", y = "GPA") +
theme_minimal()
# Perform t-test
t.test(GPA ~ Gender, data = sleep_data_clean)
##
## Welch Two Sample t-test
##
## data: GPA by Gender
## t = 3.9139, df = 200.9, p-value = 0.0001243
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## 0.09982254 0.30252780
## sample estimates:
## mean in group 0 mean in group 1
## 3.324901 3.123725
# Create a new grouping variable for class years
sleep_data_clean <- sleep_data_clean %>%
mutate(ClassYearGroup = ifelse(ClassYear %in% c(1, 2), "First Two Years", "Other Years"))
ggplot(sleep_data_clean, aes(x = ClassYearGroup, y = NumEarlyClass, fill = ClassYearGroup)) +
geom_boxplot() +
labs(title = "Number of Early Classes by Class Year Group", x = "Class Year Group", y = "Number of Early Classes") +
theme_minimal()
# Perform t-test
t.test(NumEarlyClass ~ ClassYearGroup, data = sleep_data_clean)
##
## Welch Two Sample t-test
##
## data: NumEarlyClass by ClassYearGroup
## t = 4.1813, df = 250.69, p-value = 4.009e-05
## alternative hypothesis: true difference in means between group First Two Years and group Other Years is not equal to 0
## 95 percent confidence interval:
## 0.4042016 1.1240309
## sample estimates:
## mean in group First Two Years mean in group Other Years
## 2.070423 1.306306
# Filter data to include only "Lark" and "Owl"
sleep_data_filtered <- sleep_data_clean %>%
filter(LarkOwl %in% c("Lark", "Owl"))
ggplot(sleep_data_filtered, aes(x = LarkOwl, y = CognitionZscore, fill = LarkOwl)) +
geom_boxplot() +
labs(title = "Cognitive Skills by Chronotype", x = "Chronotype (Lark/Owl)", y = "Cognition Z-Score") +
theme_minimal()
# Perform t-test
t.test(CognitionZscore ~ LarkOwl, data = sleep_data_filtered)
##
## Welch Two Sample t-test
##
## data: CognitionZscore by LarkOwl
## t = 0.80571, df = 75.331, p-value = 0.4229
## alternative hypothesis: true difference in means between group Lark and group Owl is not equal to 0
## 95 percent confidence interval:
## -0.1893561 0.4465786
## sample estimates:
## mean in group Lark mean in group Owl
## 0.09024390 -0.03836735
ggplot(sleep_data_clean, aes(x = factor(EarlyClass), y = ClassesMissed, fill = factor(EarlyClass))) +
geom_boxplot() +
labs(title = "Classes Missed by Early Class Indicator",
x = "Early Class (0=No, 1=Yes)",
y = "Number of Classes Missed") +
theme_minimal()
# Perform t-test
t.test(ClassesMissed ~ factor(EarlyClass), data = sleep_data_clean)
##
## Welch Two Sample t-test
##
## data: ClassesMissed by factor(EarlyClass)
## t = 1.4755, df = 152.78, p-value = 0.1421
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -0.2233558 1.5412830
## sample estimates:
## mean in group 0 mean in group 1
## 2.647059 1.988095
# Create a subset for "DepressionStatus"
depression_data <- sleep_data_clean %>%
filter(DepressionStatus %in% c("normal", "moderate"))
# Bar plot for Average Happiness by Depression Status
ggplot(depression_data, aes(x = DepressionStatus, y = Happiness, fill = DepressionStatus)) +
stat_summary(fun = mean, geom = "bar", position = "dodge", width = 0.7) +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2, position = position_dodge(width = 0.7)) +
labs(title = "Average Happiness by Depression Status",
x = "Depression Status",
y = "Average Happiness") +
theme_minimal()
# Perform t-test
t.test(Happiness ~ DepressionStatus, data = depression_data)
##
## Welch Two Sample t-test
##
## data: Happiness by DepressionStatus
## t = -4.3253, df = 43.992, p-value = 8.616e-05
## alternative hypothesis: true difference in means between group moderate and group normal is not equal to 0
## 95 percent confidence interval:
## -5.818614 -2.119748
## sample estimates:
## mean in group moderate mean in group normal
## 23.08824 27.05742
ggplot(sleep_data_clean, aes(x = factor(AllNighter), y = PoorSleepQuality, fill = factor(AllNighter))) +
stat_summary(fun = mean, geom = "bar", position = "dodge", width = 0.7) +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2, position = position_dodge(width = 0.7)) +
labs(title = "Average Sleep Quality by All-Nighter Status",
x = "All-Nighter (0=No, 1=Yes)",
y = "Average Poor Sleep Quality Score") +
theme_minimal()
# Perform t-test
t.test(PoorSleepQuality ~ factor(AllNighter), data = sleep_data_clean)
##
## Welch Two Sample t-test
##
## data: PoorSleepQuality by factor(AllNighter)
## t = -1.7068, df = 44.708, p-value = 0.09479
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -1.9456958 0.1608449
## sample estimates:
## mean in group 0 mean in group 1
## 6.136986 7.029412
# Filter data for students who abstain or report heavy alcohol use
alcohol_stress_data <- sleep_data_clean %>%
filter(AlcoholUse %in% c("Abstain", "Heavy"))
ggplot(alcohol_stress_data, aes(x = AlcoholUse, y = StressScore, fill = AlcoholUse)) +
stat_summary(fun = mean, geom = "bar", position = "dodge", width = 0.7) +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2, position = position_dodge(width = 0.7)) +
labs(title = "Average Stress Scores by Alcohol Use",
x = "Alcohol Use",
y = "Average Stress Score") +
theme_minimal()
# Perform t-test
t.test(StressScore ~ AlcoholUse, data = alcohol_stress_data)
##
## Welch Two Sample t-test
##
## data: StressScore by AlcoholUse
## t = -0.62604, df = 28.733, p-value = 0.5362
## alternative hypothesis: true difference in means between group Abstain and group Heavy is not equal to 0
## 95 percent confidence interval:
## -6.261170 3.327346
## sample estimates:
## mean in group Abstain mean in group Heavy
## 8.970588 10.437500
ggplot(sleep_data_clean, aes(x = factor(Gender, labels = c("Female", "Male")), y = Drinks, fill = factor(Gender, labels = c("Female", "Male")))) +
stat_summary(fun = mean, geom = "bar", position = "dodge", width = 0.7) +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2, position = position_dodge(width = 0.7)) +
labs(title = "Average Drinks Per Week by Gender",
x = "Gender",
y = "Average Drinks Per Week") +
theme_minimal()
# Perform t-test
t.test(Drinks ~ Gender, data = sleep_data_clean)
##
## Welch Two Sample t-test
##
## data: Drinks by Gender
## t = -6.1601, df = 142.75, p-value = 7.002e-09
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -4.360009 -2.241601
## sample estimates:
## mean in group 0 mean in group 1
## 4.238411 7.539216
# Categorize WeekdaySleep into two groups: LowSleep (<=6 hours) and HighSleep (>6 hours)
sleep_data_clean <- sleep_data_clean %>%
mutate(SleepCategory = ifelse(WeekdaySleep > 6, "HighSleep", "LowSleep"))
# Bar plot for GPA by Sleep Category
ggplot(sleep_data_clean, aes(x = SleepCategory, y = GPA, fill = SleepCategory)) +
stat_summary(fun = mean, geom = "bar", position = "dodge", width = 0.7) +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2, position = position_dodge(width = 0.7)) +
labs(title = "Average GPA by Weekday Sleep Hours",
x = "Weekday Sleep Category",
y = "Average GPA") +
theme_minimal()
# Perform t-test
t.test(GPA ~ SleepCategory, data = sleep_data_clean)
##
## Welch Two Sample t-test
##
## data: GPA by SleepCategory
## t = -0.72752, df = 18.465, p-value = 0.476
## alternative hypothesis: true difference in means between group HighSleep and group LowSleep is not equal to 0
## 95 percent confidence interval:
## -0.2829106 0.1371778
## sample estimates:
## mean in group HighSleep mean in group LowSleep
## 3.238898 3.311765
# Create a grouping for first two years vs. other years
sleep_data_clean <- sleep_data_clean %>%
mutate(ClassYearGroup = ifelse(ClassYear <= 2, "FirstTwoYears", "OtherYears"))
ggplot(sleep_data_clean, aes(x = ClassYearGroup, y = WeekendSleep, fill = ClassYearGroup)) +
stat_summary(fun = mean, geom = "bar", position = "dodge", width = 0.7) +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", width = 0.2, position = position_dodge(width = 0.7)) +
labs(title = "Average Weekend Sleep Hours by Class Year Group",
x = "Class Year Group",
y = "Average Weekend Sleep Hours") +
theme_minimal()
# Perform t-test
t.test(WeekendSleep ~ ClassYearGroup, data = sleep_data_clean)
##
## Welch Two Sample t-test
##
## data: WeekendSleep by ClassYearGroup
## t = -0.047888, df = 237.36, p-value = 0.9618
## alternative hypothesis: true difference in means between group FirstTwoYears and group OtherYears is not equal to 0
## 95 percent confidence interval:
## -0.3497614 0.3331607
## sample estimates:
## mean in group FirstTwoYears mean in group OtherYears
## 8.213592 8.221892