Assignment

#DataSetA

Open the Installed Packages

library(readxl)
library(ggpubr)

## Loading required package: ggplot2

Import the dataSet

DatasetA <- read_excel("C:/Users/cniti/Downloads/DatasetA.xlsx")

Calculate the Descriptive Statistics

mean(DatasetA$StudyHours)

## [1] 6.135609

sd(DatasetA$StudyHours)

## [1] 1.369224

mean(DatasetA$ExamScore)

## [1] 90.06906

sd(DatasetA$ExamScore)

## [1] 6.795224

Create Histograms & Visually Check Normality

hist(DatasetA$StudyHours,
     main = "StudyHours",
     breaks = 20,
     col = "lightblue",
     border = "white",
     cex.main = 1,
     cex.axis = 1,
     cex.lab = 1)

hist(DatasetA$ExamScore,
     main = "ExamScore",
     breaks = 20,
     col = "lightcoral",
     border = "white",
     cex.main = 1,
     cex.axis = 1,
     cex.lab = 1)

Statistically Test Normality

shapiro.test(DatasetA$StudyHours)

## 
##  Shapiro-Wilk normality test
## 
## data:  DatasetA$StudyHours
## W = 0.99388, p-value = 0.9349

shapiro.test(DatasetA$ExamScore)

## 
##  Shapiro-Wilk normality test
## 
## data:  DatasetA$ExamScore
## W = 0.96286, p-value = 0.006465

Conduct Correlation Test (Test Hypotheses)

cor.test(DatasetA$StudyHours, DatasetA$ExamScore, method = "pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  DatasetA$StudyHours and DatasetA$ExamScore
## t = 20.959, df = 98, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8606509 0.9346369
## sample estimates:
##      cor 
## 0.904214

cor.test(DatasetA$StudyHours, DatasetA$ExamScore, method = "spearman")

## Warning in cor.test.default(DatasetA$StudyHours, DatasetA$ExamScore, method =
## "spearman"): Cannot compute exact p-value with ties

## 
##  Spearman's rank correlation rho
## 
## data:  DatasetA$StudyHours and DatasetA$ExamScore
## S = 16518, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.9008825

Create a Scatterplot to Visualize the Relationship

ggscatter(
  DatasetA,
  x = "StudyHours",
  y = "ExamScore",
  add = "reg.line",
  xlab = "StudyHours",
  ylab = "ExamScore"
)

#DataSetA Correlation Test and Interpretation

The Spearman Correlation test was selected because ExamScore failed the Shapiro-Wilk normality test (p = 0.006 < .05), meaning at least one variable was not normally distributed.

The p-value for the Spearman correlation is less than .05 (p < .001), which means the results are statistically significant. The alternate hypothesis is supported.

The rho value is 0.90. The correlation is positive, meaning as study hours increase, exam scores increase.

The correlation value is greater than 0.50, which indicates a strong relationship between study hours and exam scores.

#DatasetA (Spearman Correlation Results)

Study hours (M = 6.14, SD = 1.37) was correlated with exam score (M = 90.07, SD = 6.80), ρ(98) = .90, p = .000 The relationship was positive and strong. As study hours increased, exam scores increased.

#DataSetB

Import the DataSet

DatasetB <- read_excel("C:/Users/cniti/Downloads/DatasetB.xlsx")

Calculate the Descriptive Statistics

mean(DatasetB$ScreenTime)

## [1] 5.063296

sd(DatasetB$ScreenTime)

## [1] 2.056833

mean(DatasetB$SleepingHours)

## [1] 6.938459

sd(DatasetB$SleepingHours)

## [1] 1.351332

Create Histograms & Visually Check Normality

hist(DatasetB$ScreenTime,
     main = "ScreenTime",
     breaks = 10,
     col = "green",
     border = "white",
     cex.main = 1,
     cex.axis = 1,
     cex.lab = 1)

hist(DatasetB$SleepingHours,
     main = "SleepingHours",
     breaks = 10,
     col = "orange",
     border = "white",
     cex.main = 1,
     cex.axis = 1,
     cex.lab = 1)

Statistically Test Normality

shapiro.test(DatasetB$ScreenTime)

## 
##  Shapiro-Wilk normality test
## 
## data:  DatasetB$ScreenTime
## W = 0.90278, p-value = 1.914e-06

shapiro.test(DatasetB$SleepingHours)

## 
##  Shapiro-Wilk normality test
## 
## data:  DatasetB$SleepingHours
## W = 0.98467, p-value = 0.3004

Conduct Correlation Test (Test Hypotheses)

cor.test(DatasetB$ScreenTime, DatasetB$SleepingHours, method = "pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  DatasetB$ScreenTime and DatasetB$SleepingHours
## t = -8.2538, df = 98, p-value = 7.27e-13
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.7433008 -0.5078341
## sample estimates:
##        cor 
## -0.6403761

cor.test(DatasetB$ScreenTime, DatasetB$SleepingHours, method = "spearman")

## 
##  Spearman's rank correlation rho
## 
## data:  DatasetB$ScreenTime and DatasetB$SleepingHours
## S = 259052, p-value = 3.521e-09
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.5544674

Create a Scatterplot to Visualize the Relationship

ggscatter(
  DatasetB,
  x = "ScreenTime",
  y = "SleepingHours",
  add = "reg.line",
  xlab = "ScreenTome",
  ylab = "SleepingHours"
)

#DataSetB Correlation Test and Interpretation

The Spearman Correlation test was selected because ScreenTime failed the Shapiro-Wilk normality test (p < .05), meaning at least one variable was not normally distributed.

The p-value for the Spearman correlation is less than .05 (p < .001), which means the results are statistically significant. The alternate hypothesis is supported.

The rho value is -0.55. The correlation is negative, meaning as screen time increases, sleeping hours decrease.

The correlation value falls between -0.50 and -1.00, which indicates a strong negative relationship between screen time and sleeping hours.

#Spearman Correlation Results (DatasetB)

Screen time (M = 5.06, SD = 2.05) was correlated with sleeping hours (M = 6.93, SD = 1.35), r(98) = -0.64, p = .000. The relationship was negative and strong. As screen time increased, sleeping hours decreased.

Assignment_4

Nitin

2026-02-09