Part 1: Import and Descriptive Statistics

library(readxl)
library(ggpubr)

## Loading required package: ggplot2

DatasetA <- read_excel("/Users/asfia/Desktop/DatasetA.xlsx")
DatasetB <- read_excel("/Users/asfia/Desktop/DatasetB.xlsx")

# Running the stats
mean(DatasetA$StudyHours); sd(DatasetA$StudyHours)

## [1] 6.135609

## [1] 1.369224

mean(DatasetA$ExamScore); sd(DatasetA$ExamScore)

## [1] 90.06906

## [1] 6.795224

mean(DatasetB$ScreenTime); sd(DatasetB$ScreenTime)

## [1] 5.063296

## [1] 2.056833

mean(DatasetB$SleepingHours); sd(DatasetB$SleepingHours)

## [1] 6.938459

## [1] 1.351332

Part 2: Normality Check

shapiro.test(DatasetA$StudyHours)

## 
##  Shapiro-Wilk normality test
## 
## data:  DatasetA$StudyHours
## W = 0.99388, p-value = 0.9349

shapiro.test(DatasetA$ExamScore)

## 
##  Shapiro-Wilk normality test
## 
## data:  DatasetA$ExamScore
## W = 0.96286, p-value = 0.006465

shapiro.test(DatasetB$ScreenTime)

## 
##  Shapiro-Wilk normality test
## 
## data:  DatasetB$ScreenTime
## W = 0.90278, p-value = 1.914e-06

shapiro.test(DatasetB$SleepingHours)

## 
##  Shapiro-Wilk normality test
## 
## data:  DatasetB$SleepingHours
## W = 0.98467, p-value = 0.3004

Part 3: Histograms to Visually Check Normality

# Histograms for Dataset A
hist(DatasetA$StudyHours, main="Histogram of Study Hours", col="lightblue", breaks=20)

hist(DatasetA$ExamScore, main="Histogram of Exam Scores", col="lightgreen", breaks=20)

# Histograms for Dataset B
hist(DatasetB$ScreenTime, main="Histogram of Screen Time", col="pink", breaks=20)

hist(DatasetB$SleepingHours, main="Histogram of Sleeping Hours", col="lightyellow", breaks=20)

# Part 4 & 5: Correlation and Scatterplots

# Spearman used because p < .05 in Shapiro tests
cor.test(DatasetA$StudyHours, DatasetA$ExamScore, method = "spearman")

## Warning in cor.test.default(DatasetA$StudyHours, DatasetA$ExamScore, method =
## "spearman"): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  DatasetA$StudyHours and DatasetA$ExamScore
## S = 16518, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.9008825

cor.test(DatasetB$ScreenTime, DatasetB$SleepingHours, method = "spearman")

## 
##  Spearman's rank correlation rho
## 
## data:  DatasetB$ScreenTime and DatasetB$SleepingHours
## S = 259052, p-value = 3.521e-09
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.5544674

# Visualizing the relationships
ggscatter(DatasetA, x = "StudyHours", y = "ExamScore", add = "reg.line", 
          xlab = "Study Hours", ylab = "Exam Score")

ggscatter(DatasetB, x = "ScreenTime", y = "SleepingHours", add = "reg.line", 
          xlab = "Screen Time", ylab = "Sleeping Hours")

Part 6: Final InterpretationThe analysis found a strong positive correlation between Study Hours and Exam Scores (\(\rho = 0.90\)), meaning as students studied more, their scores improved. Conversely, there was a moderate negative correlation between Screen Time and Sleeping Hours (\(\rho = -0.55\)), indicating that more screen time was associated with fewer hours of sleep. Spearman’s rank correlation was used because the data did not follow a normal distribution.

Assignment 4

Asfiya Khanam

2026-02-02

Part 1: Import and Descriptive Statistics

Part 2: Normality Check

Part 3: Histograms to Visually Check Normality