R Markdown
library(readxl)
library(ggpubr)
## Loading required package: ggplot2
library(ggplot2)
data_age <-read_excel("A4Q1.xlsx")
ggscatter(
data_age,
x = "age",
y = "education",
add = "reg.line",
xlab = "age",
ylab = "education"
)

#The relationship is linear.
#The relationship is positive.
#The relationship is moderate.
#There are outliers
mean(data_age$age)
## [1] 35.32634
sd(data_age$age)
## [1] 11.45344
median(data_age$age)
## [1] 35.79811
mean(data_age$education)
## [1] 13.82705
sd(data_age$education)
## [1] 2.595901
median(data_age$education)
## [1] 14.02915
hist(data_age$age,
main = "Age",
breaks = 20,
col = "lightblue",
border = "white",
cex.main = 1,
cex.axis = 1,
cex.lab = 1)

hist(data_age$education,
main = "Education",
breaks = 20,
col = "lightcoral",
border = "white",
cex.main = 1,
cex.axis = 1,
cex.lab = 1)

# Variable 1: Age
# The first variable looks normally distributed.
# The data is symmetrical.
# The data has a proper bell curve.
# Variable 1: Education
# The first variable looks normally distributed.
# The data is symmetrical.
# The data has a proper bell curve.
shapiro.test(data_age$age)
##
## Shapiro-Wilk normality test
##
## data: data_age$age
## W = 0.99194, p-value = 0.5581
#Shapiro-Wilk normality test
#data: data_age$age
#W = 0.99194, p-value = 0.5581
shapiro.test(data_age$education)
##
## Shapiro-Wilk normality test
##
## data: data_age$education
## W = 0.9908, p-value = 0.4385
#Shapiro-Wilk normality test
#data: data_age$education
#W = 0.9908, p-value = 0.4385
#There is no evidence to reject normality for either variable.
cor.test(data_age$age, data_age$education, method = "pearson")
##
## Pearson's product-moment correlation
##
## data: data_age$age and data_age$education
## t = 7.4066, df = 148, p-value = 9.113e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3924728 0.6279534
## sample estimates:
## cor
## 0.5200256
#A Pearson correlation was conducted to test the relationship between a person's age and education level.
#There was a statistically significant relationship between the two variables, r(148) = .52, p < .001.
#The relationship was positive and moderate.
#As age increased, education level increased.