opening the installed packages
library(readxl)
library(ggpubr)
## Loading required package: ggplot2
importing data sets
datasetA <- read_excel("C:/Users/pavan/Downloads/DatasetA.xlsx")
datasetB <- read_excel("C:/Users/pavan/Downloads/DatasetB.xlsx")
datasetA
Calculate the Descriptive Statistics for datasetA
mean(datasetA$StudyHours)
## [1] 6.135609
sd(datasetA$StudyHours)
## [1] 1.369224
mean(datasetA$ExamScore)
## [1] 90.06906
sd(datasetA$ExamScore)
## [1] 6.795224
Create Histograms & Visually Check Normality
hist(datasetA$StudyHours,
main = "StudyHours",
breaks = 10,
col = "red",
border = "white",
cex.main = 1,
cex.axis = 1,
cex.lab = 1)
hist(datasetA$ExamScore,
main = "ExamScore",
breaks = 10,
col = "green",
border = "white",
cex.main = 1,
cex.axis = 1,
cex.lab = 1)
Statistically Test Normality
shapiro.test(datasetA$StudyHours)
##
## Shapiro-Wilk normality test
##
## data: datasetA$StudyHours
## W = 0.99388, p-value = 0.9349
shapiro.test(datasetA$ExamScore)
##
## Shapiro-Wilk normality test
##
## data: datasetA$ExamScore
## W = 0.96286, p-value = 0.006465
Conduct Correlation Test (Test Hypotheses)
cor.test(datasetA$StudyHours, datasetA$ExamScore, method = "spearman")
## Warning in cor.test.default(datasetA$StudyHours, datasetA$ExamScore, method =
## "spearman"): Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: datasetA$StudyHours and datasetA$ExamScore
## S = 16518, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.9008825
Create a Scatterplot to Visualize the Relationship
ggscatter(
datasetA,
x = "StudyHours",
y = "ExamScore",
add = "reg.line",
xlab = "StudyHour",
ylab = "ExamScore"
)
Report the Results for datasetA Study hours (M = 6.14, SD = 1.37) was correlated with exam score (M = 90.07, SD = 6.80), ρ(98) = .90, p = .000. The relationship was positive and strong. As study hours increased, exam score increased.
Dataset A (StudyHours → ExamScore) — Spearman
Independent variable (StudyHours): M = 6.14, SD = 1.37
Dependent variable (ExamScore): M = 90.07, SD = 6.80
Relationship between variables: There was a relationship between the variables.
Degrees of freedom (df): 98
Correlation value (ρ): 0.90
Exact p-value: p = 0.00000000000000022
Direction & strength: The relationship was positive and strong.
Direction sentence: As study hours increased, exam score increased.
datasetB
Calculate the Descriptive Statistics for datasetB
mean(datasetB$ScreenTime)
## [1] 5.063296
sd(datasetB$ScreenTime)
## [1] 2.056833
mean(datasetB$SleepingHours)
## [1] 6.938459
sd(datasetB$SleepingHours)
## [1] 1.351332
Create Histograms & Visually Check Normality for datasetB
hist(datasetB$ScreenTime,
main = "ScreenTime",
breaks = 10,
col = "red",
border = "white",
cex.main = 1,
cex.axis = 1,
cex.lab = 1)
hist(datasetB$SleepingHours,
main = "SleepingHours",
breaks = 10,
col = "green",
border = "white",
cex.main = 1,
cex.axis = 1,
cex.lab = 1)
Statistically Test Normality for datasetB
shapiro.test(datasetB$ScreenTime)
##
## Shapiro-Wilk normality test
##
## data: datasetB$ScreenTime
## W = 0.90278, p-value = 1.914e-06
shapiro.test(datasetB$SleepingHours)
##
## Shapiro-Wilk normality test
##
## data: datasetB$SleepingHours
## W = 0.98467, p-value = 0.3004
Conduct Correlation Test (Test Hypotheses) for datasetB
cor.test(datasetB$ScreenTime, datasetB$SleepingHours, method = "spearman")
##
## Spearman's rank correlation rho
##
## data: datasetB$ScreenTime and datasetB$SleepingHours
## S = 259052, p-value = 3.521e-09
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.5544674
Create a Scatterplot to Visualize the Relationship for datasetb
ggscatter(
datasetB,
x = "ScreenTime",
y = "SleepingHours",
add = "reg.line",
xlab = "ScreenTime",
ylab = "SleepingHours"
)
Report the Results for datasetB Screen time (M = 5.06, SD = 2.05) was
correlated with sleeping hours (M = 6.94, SD = 1.35), ρ(98) = -.55, p =
.000. The relationship was negative and strong. As screen time
increased, sleeping hours decreased.
Dataset B (ScreenTime → SleepingHours) — Spearman
Independent variable (ScreenTime): M = 5.06, SD = 2.06
Dependent variable (SleepingHours): M = 6.94, SD = 1.35
Relationship between variables: There was a relationship between the variables.
Degrees of freedom (df): 98
Correlation value (ρ): -0.55
Exact p-value: p = 0.000000003521
Direction & strength: The relationship was negative and strong.
Direction sentence: As screen time increased, sleeping hours decreased.