title: “assignemnt 1 q 1” author: “Karim Pare” date: “2026-02-04” output: html_document

install.packages(“readxl”) install.packages(“ggpubr”)

library(readxl) 
library(ggpubr)
## Loading required package: ggplot2
DatasetA<-read_excel("/Users/karim/Desktop/DatasetA.xlsx")
DatasetB<-read_excel("/Users/karim/Desktop/DatasetB.xlsx")

Running the stats

mean(DatasetA$StudyHours); sd(DatasetA$StudyHours)
## [1] 6.135609
## [1] 1.369224
mean(DatasetA$ExamScore); sd(DatasetA$ExamScore)
## [1] 90.06906
## [1] 6.795224
mean(DatasetB$ScreenTime); sd(DatasetB$ScreenTime) 
## [1] 5.063296
## [1] 2.056833
mean(DatasetB$SleepingHours); sd(DatasetB$SleepingHours) 
## [1] 6.938459
## [1] 1.351332

number 2: {r}

shapiro.test(DatasetA$StudyHours) 
## 
##  Shapiro-Wilk normality test
## 
## data:  DatasetA$StudyHours
## W = 0.99388, p-value = 0.9349
shapiro.test(DatasetA$ExamScore) 
## 
##  Shapiro-Wilk normality test
## 
## data:  DatasetA$ExamScore
## W = 0.96286, p-value = 0.006465
shapiro.test(DatasetB$ScreenTime) 
## 
##  Shapiro-Wilk normality test
## 
## data:  DatasetB$ScreenTime
## W = 0.90278, p-value = 1.914e-06
shapiro.test(DatasetB$SleepingHours)
## 
##  Shapiro-Wilk normality test
## 
## data:  DatasetB$SleepingHours
## W = 0.98467, p-value = 0.3004

3: Histograms {r} Histograms for Dataset A

hist(DatasetA$ExamScore, main="Histogram of Exam Scores", col="lightgreen", breaks=20)

Histograms for Dataset B

hist(DatasetB$ScreenTime, main="Histogram of Screen Time", col="pink", breaks=20) 

hist(DatasetB$SleepingHours, main="Histogram of Sleeping Hours", col="lightyellow", breaks=20) 

4 and 5: Correlation and Scatterplots {r} Spearman used because p < .05 in Shapiro tests

cor.test(DatasetA$StudyHours, DatasetA$ExamScore, method = "spearman") 
## Warning in cor.test.default(DatasetA$StudyHours, DatasetA$ExamScore, method =
## "spearman"): Cannot compute exact p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  DatasetA$StudyHours and DatasetA$ExamScore
## S = 16518, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.9008825

Visualizing the relationships

ggscatter(DatasetA, x = "StudyHours", y = "ExamScore", add = "reg.line", 
          xlab = "Study Hours", ylab = "Exam Score") 

ggscatter(DatasetB, x = "ScreenTime", y = "SleepingHours", add = "reg.line", 
          xlab = "Screen Time", ylab = "Sleeping Hours")