library(readxl)
library(ggpubr)
## Loading required package: ggplot2
Replace Full Path with required path when running
D1 <- D1 <- read_excel("A4Q1.xlsx")
Plot the graph
ggscatter(
D1,
x = "age",
y = "education",
color = "#00A",
shape = "triangle",
add = "reg.line",
add.params = list(color = "red", size = 2), # Change ONLY the line color here
xlab = "Independent Variable (age) /years",
ylab = "Dependent Variable (education) /years"
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the ggpubr package.
## Please report the issue at <https://github.com/kassambara/ggpubr/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
The relationship is [positive]. The relationship is [moderate). There [are no] outliers.
age_mean <- mean(D1$age)
age_sd <- sd(D1$age)
age_median <- median(D1$age)
cat( "\n",
"age mean = ",age_mean, "years",
"\n",
"age SD = ", age_sd , "years",
"\n",
"age median = ", age_median, "years",
"\n" )
##
## age mean = 35.32634 years
## age SD = 11.45344 years
## age median = 35.79811 years
education_mean <- mean(D1$education)
education_sd <- sd(D1$education)
education_median <- median(D1$education)
cat("\n",
"education mean = ",education_mean, "years",
"\n",
"education SD = ", education_sd , "years",
"\n",
"education median = ", education_median, "years",
"\n" )
##
## education mean = 13.82705 years
## education SD = 2.595901 years
## education median = 14.02915 years
#frequency plot
hist(D1$age,
main = "age",
breaks = 30,
col = "lightblue",
border = "white",
freq = TRUE,
cex.main = 1,
cex.axis = 1,
cex.lab = 1)
#density plot
hist(D1$age,
main = "age",
breaks = 30,
col = "lightblue",
border = "white",
freq = FALSE,
cex.main = 1,
cex.axis = 1,
cex.lab = 1)
#adding normal distribution curve
curve(dnorm(x, mean = age_mean, sd = age_sd),
add=TRUE,
col = "red",
lwd = 2)
#adding vertical lines
abline(v = age_mean, col = "blue", lwd = 2, lty = 2)
abline(v = age_median, col = "darkgreen", lwd = 2, lty = 3)
## education histogram
#frequency plot
hist(D1$education,
main = "education",
breaks = 20,
freq = TRUE,
col = "lightcoral",
border = "white",
cex.main = 1,
cex.axis = 1,
cex.lab = 1)
#density plot
hist(D1$education,
main = "education",
breaks = 20,
freq = FALSE,
col = "lightcoral",
border = "white",
cex.main = 1,
cex.axis = 1,
cex.lab = 1)
curve(dnorm(x, mean = education_mean, sd = education_sd),
add=TRUE,
col = "red",
lwd = 2)
abline(v = education_mean, col = "blue", lwd = 2, lty = 2)
abline(v = education_median, col = "darkgreen", lwd = 2, lty = 3)
Variable 1: age The age looks normally distributed. The data is symmetrical. The data has a proper bell curve.
Variable 2: education The education looks normally distributed. The data is symmetrical. The data has a proper bell curve.
st1 <- shapiro.test(D1$age)
st1
##
## Shapiro-Wilk normality test
##
## data: D1$age
## W = 0.99194, p-value = 0.5581
if (st1$p.value >= 0.05) { print ("age data is normal") } else { print ("age data is abnormal") }
## [1] "age data is normal"
st2 <- shapiro.test(D1$education)
st2
##
## Shapiro-Wilk normality test
##
## data: D1$education
## W = 0.9908, p-value = 0.4385
if (st2$p.value >= 0.05) { print ("Education data is normal") } else { print ("Education data is abnormal") }
## [1] "Education data is normal"
if (st2$p.value != st1$p.value) {cat ( "\n" , "pearsons test needed")} else {"pearsons test needed"}
##
## pearsons test needed
Data looks normally distributed
cor.test(D1$age, D1$education, method = "pearson")
##
## Pearson's product-moment correlation
##
## data: D1$age and D1$education
## t = 7.4066, df = 148, p-value = 9.113e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3924728 0.6279534
## sample estimates:
## cor
## 0.5200256
A pearson correlation was conducted to test the relationship between age (M = 35.326 years, SD = 11.463 years) and education (M = 13.82 years, SD = 2.596 years).
There was a statistically significant relationship between the two variables, r(148) = 0.5200, p < 0.001.
The relationship was positive and strong.
As age increased, education increased.