library("readxl")
library("ggpubr")
## Loading required package: ggplot2
DataSetA <- read_excel("/Users/komakechivan/Downloads/DatasetA.xlsx")
DataSetB <- read_excel("/Users/komakechivan/Downloads/DatasetB.xlsx")
# DataSetA
# independent variable
mean(DataSetA$StudyHours)
## [1] 6.135609
sd(DataSetA$StudyHours)
## [1] 1.369224
#dependent variable
mean(DataSetA$ExamScore)
## [1] 90.06906
sd(DataSetA$ExamScore)
## [1] 6.795224
# DataSetB
#independent variable
mean(DataSetB$ScreenTime)
## [1] 5.063296
sd(DataSetB$ScreenTime)
## [1] 2.056833
# dependent variable
mean(DataSetB$SleepingHours)
## [1] 6.938459
sd(DataSetB$SleepingHours)
## [1] 1.351332
# Histogram
# DataSetA
hist(DataSetA$StudyHours,
main = "Study Hours",
breaks = 20,
col = "lightgreen",
border = "white",
cex.main = 1,
cex.axis = 1,
cex.lab = 1)
# The variable "Study Hours" appears normally distrubuted. The data looks
# symmetrical having most data in the middle.
hist(DataSetA$ExamScore,
main = "Exam Score",
breaks = 20,
col = "lightblue",
border = "white",
cex.main = 1,
cex.axis = 1,
cex.lab = 1)
# The variable "Exam Score" appears not normally distributed. The data looks
# negatively skewed having most data to the right
#DataSetB
hist(DataSetB$ScreenTime,
main = "Screen Time",
breaks = 20,
col = "lightcyan",
border = "cyan",
cex.main = 1,
cex.axis = 1,
cex.lab = 1)
# The variable "Screen Time" appears not normally distributed. The data looks
# positively skewed having most data to the left
hist(DataSetB$SleepingHours,
main = "Sleeping Hours",
breaks = 20,
col = "gray",
border = "black",
cex.main = 1,
cex.axis = 1,
cex.lab = 1)
# Dataset A: If p < 0.05, the variable is NOT normal.
shapiro.test(DataSetA$StudyHours)
##
## Shapiro-Wilk normality test
##
## data: DataSetA$StudyHours
## W = 0.99388, p-value = 0.9349
shapiro.test(DataSetA$ExamScore)
##
## Shapiro-Wilk normality test
##
## data: DataSetA$ExamScore
## W = 0.96286, p-value = 0.006465
# Note: Based on your observation of skewness in Exam Score,
# this will likely be < 0.05, necessitating a Spearman test.
# Dataset B
shapiro.test(DataSetB$ScreenTime)
##
## Shapiro-Wilk normality test
##
## data: DataSetB$ScreenTime
## W = 0.90278, p-value = 1.914e-06
shapiro.test(DataSetB$SleepingHours)
##
## Shapiro-Wilk normality test
##
## data: DataSetB$SleepingHours
## W = 0.98467, p-value = 0.3004
### Research Question 1: Study Hours vs Exam Score
# Using Spearman because ExamScore showed skewness
cor_test_A <- cor.test(DataSetA$StudyHours, DataSetA$ExamScore,
method = "spearman", exact = FALSE)
print(cor_test_A)
##
## Spearman's rank correlation rho
##
## data: DataSetA$StudyHours and DataSetA$ExamScore
## S = 16518, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.9008825
### Research Question 2: Screen Time vs Sleep
# Using Spearman because ScreenTime showed skewness
cor_test_B <- cor.test(DataSetB$ScreenTime, DataSetB$SleepingHours,
method = "spearman", exact = FALSE)
print(cor_test_B)
##
## Spearman's rank correlation rho
##
## data: DataSetB$ScreenTime and DataSetB$SleepingHours
## S = 259052, p-value = 2.161e-09
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.5544674
# Dataset A Scatterplot
# We use ggscatter to easily include the regression line and correlation stats
ggscatter(DataSetA, x = "StudyHours", y = "ExamScore",
add = "reg.line", conf.int = TRUE,
cor.method = "spearman",
main = "Relationship: Study Hours vs Exam Score",
xlab = "Hours Spent Studying", ylab = "Exam Score (%)",
color = "darkgreen", shape = 21, fill = "lightgreen")
# Dataset B Scatterplot
ggscatter(DataSetB, x = "ScreenTime", y = "SleepingHours",
add = "reg.line", conf.int = TRUE,
cor.method = "spearman",
main = "Relationship: Screen Time vs Sleeping Hours",
xlab = "Daily Screen Time (Hours)", ylab = "Total Sleep (Hours)",
color = "darkblue", shape = 21, fill = "lightblue")
Students studied for an average of 6.14 hours (SD = 1.37), while the average exam score was 90.1% (SD = 6.80). A Spearman’s rank correlation was conducted to assess the relationship between the variables.The analysis revealed a statistically significant, very strong positive relationship, (rho(98) = 0.90, p < .001). This indicates that as study hours increase, exam scores increase significantly and predictably.
Participants averaged 5.1 hours of screen time (SD = 2.10) and 6.94 hours of sleep (SD = 1.35). A Spearman’s rank correlation was conducted to assess the relationship. The analysis revealed a statistically significant, moderate negative relationship, (rho(98) = -0.55, p < .001). This indicates that as screen time increases, sleeping hours tend to decrease significantly.