library("readxl")
library("ggpubr")
## Loading required package: ggplot2
DataSetA <- read_excel("/Users/komakechivan/Downloads/DatasetA.xlsx")
DataSetB <- read_excel("/Users/komakechivan/Downloads/DatasetB.xlsx")
# independent variable
mean(DataSetA$StudyHours)
## [1] 6.135609
sd(DataSetA$StudyHours)
## [1] 1.369224
#dependent variable
mean(DataSetA$ExamScore)
## [1] 90.06906
sd(DataSetA$ExamScore)
## [1] 6.795224
hist(DataSetA$StudyHours,
main = "Study Hours",
breaks = 20,
col = "lightgreen",
border = "white",
cex.main = 1,
cex.axis = 1,
cex.lab = 1)
hist(DataSetA$ExamScore,
main = "Exam Score",
breaks = 20,
col = "lightblue",
border = "white",
cex.main = 1,
cex.axis = 1,
cex.lab = 1)
shapiro.test(DataSetA$StudyHours)
##
## Shapiro-Wilk normality test
##
## data: DataSetA$StudyHours
## W = 0.99388, p-value = 0.9349
shapiro.test(DataSetA$ExamScore)
##
## Shapiro-Wilk normality test
##
## data: DataSetA$ExamScore
## W = 0.96286, p-value = 0.006465
cor_test_A <- cor.test(DataSetA$StudyHours, DataSetA$ExamScore,
method = "spearman", exact = FALSE)
print(cor_test_A)
##
## Spearman's rank correlation rho
##
## data: DataSetA$StudyHours and DataSetA$ExamScore
## S = 16518, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.9008825
We use ggscatter to easily include the regression line and correlation stats
ggscatter(DataSetA, x = "StudyHours", y = "ExamScore",
add = "reg.line", conf.int = TRUE,
cor.method = "spearman",
main = "Relationship: Study Hours vs Exam Score",
xlab = "Hours Spent Studying", ylab = "Exam Score (%)",
color = "darkgreen", shape = 21, fill = "lightgreen")
#independent variable
mean(DataSetB$ScreenTime)
## [1] 5.063296
sd(DataSetB$ScreenTime)
## [1] 2.056833
# dependent variable
mean(DataSetB$SleepingHours)
## [1] 6.938459
sd(DataSetB$SleepingHours)
## [1] 1.351332
hist(DataSetB$ScreenTime,
main = "Screen Time",
breaks = 20,
col = "lightcyan",
border = "cyan",
cex.main = 1,
cex.axis = 1,
cex.lab = 1)
hist(DataSetB$SleepingHours,
main = "Sleeping Hours",
breaks = 20,
col = "gray",
border = "black",
cex.main = 1,
cex.axis = 1,
cex.lab = 1)
shapiro.test(DataSetB$ScreenTime)
##
## Shapiro-Wilk normality test
##
## data: DataSetB$ScreenTime
## W = 0.90278, p-value = 1.914e-06
shapiro.test(DataSetB$SleepingHours)
##
## Shapiro-Wilk normality test
##
## data: DataSetB$SleepingHours
## W = 0.98467, p-value = 0.3004
cor_test_B <- cor.test(DataSetB$ScreenTime, DataSetB$SleepingHours,
method = "spearman", exact = FALSE)
print(cor_test_B)
##
## Spearman's rank correlation rho
##
## data: DataSetB$ScreenTime and DataSetB$SleepingHours
## S = 259052, p-value = 2.161e-09
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.5544674
ggscatter(DataSetB, x = "ScreenTime", y = "SleepingHours",
add = "reg.line", conf.int = TRUE,
cor.method = "spearman",
main = "Relationship: Screen Time vs Sleeping Hours",
xlab = "Daily Screen Time (Hours)", ylab = "Total Sleep (Hours)",
color = "darkblue", shape = 21, fill = "lightblue")