Introduction Loading Libraries into memory

library(readxl)
library(ggpubr)
## Loading required package: ggplot2

Loading Dataset from public directory

Replace Full Path with required path when running

D1 <- D1 <- read_excel("A4Q1.xlsx")

Loading Dataset from public directory

Plot the graph

ggscatter(
  D1,
  x = "age",
  y = "education",
  color = "#00A",
  shape = "triangle",
  add = "reg.line",
  add.params = list(color = "red", size = 2), # Change ONLY the line color here
  xlab = "Independent Variable (age) /years",
  ylab = "Dependent Variable (education) /years"
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the ggpubr package.
##   Please report the issue at <https://github.com/kassambara/ggpubr/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

The relationship is [linear].

The relationship is [positive]. The relationship is [moderate). There [are no] outliers.

Descriptive Stastics

age_mean <- mean(D1$age)
age_sd <- sd(D1$age)
age_median <- median(D1$age)

cat( "\n",
     "age mean = ",age_mean, "years",
     "\n", 
     "age SD = ", age_sd , "years",
     "\n", 
     "age median = ", age_median, "years",
     "\n" )
## 
##  age mean =  35.32634 years 
##  age SD =  11.45344 years 
##  age median =  35.79811 years
education_mean <- mean(D1$education)
education_sd  <- sd(D1$education)
education_median <- median(D1$education)

cat("\n", 
    "education mean = ",education_mean, "years",
    "\n", 
    "education SD = ", education_sd , "years",
    "\n", 
    "education median = ", education_median, "years",
    "\n" )
## 
##  education mean =  13.82705 years 
##  education SD =  2.595901 years 
##  education median =  14.02915 years

Plotting normal distribution curves

age histogram

#frequency plot
hist(D1$age,
     main = "age",
     breaks = 30,
     col = "lightblue",
     border = "white",
     freq = TRUE,
     cex.main = 1,
     cex.axis = 1,
     cex.lab = 1)

#density plot
hist(D1$age,
     main = "age",
     breaks = 30,
     col = "lightblue",
     border = "white",
     freq = FALSE,
     cex.main = 1,
     cex.axis = 1,
     cex.lab = 1)
#adding normal distribution curve
curve(dnorm(x, mean = age_mean, sd = age_sd), 
      add=TRUE, 
      col = "red",
      lwd = 2)
#adding vertical lines
abline(v = age_mean, col = "blue", lwd = 2, lty = 2)
abline(v = age_median, col = "darkgreen", lwd = 2, lty = 3)

## education histogram

#frequency plot
hist(D1$education,
     main = "education",
     breaks = 20,
     freq = TRUE,
     col = "lightcoral",
     border = "white",
     cex.main = 1,
     cex.axis = 1,
     cex.lab = 1)

#density plot
hist(D1$education,
     main = "education",
     breaks = 20,
     freq = FALSE,
     col = "lightcoral",
     border = "white",
     cex.main = 1,
     cex.axis = 1,
     cex.lab = 1)
curve(dnorm(x, mean = education_mean, sd = education_sd), 
      add=TRUE, 
      col = "red",
      lwd = 2)
abline(v = education_mean, col = "blue", lwd = 2, lty = 2)
abline(v = education_median, col = "darkgreen", lwd = 2, lty = 3)

Symetry Analysis

Variable 1: age The age looks normally distributed. The data is symmetrical. The data has a proper bell curve.

Variable 2: education The education looks normally distributed. The data is symmetrical. The data has a proper bell curve.

normality stastistics

st1 <- shapiro.test(D1$age)
st1
## 
##  Shapiro-Wilk normality test
## 
## data:  D1$age
## W = 0.99194, p-value = 0.5581
if (st1$p.value >= 0.05) { print ("age data is normal") } else { print ("age data is abnormal") }
## [1] "age data is normal"
st2 <- shapiro.test(D1$education)
st2
## 
##  Shapiro-Wilk normality test
## 
## data:  D1$education
## W = 0.9908, p-value = 0.4385
if (st2$p.value >= 0.05) { print ("Education data is normal") } else { print ("Education data is abnormal") }
## [1] "Education data is normal"
if (st2$p.value != st1$p.value) {cat ( "\n" , "pearsons test needed")} else {"pearsons test needed"}
## 
##  pearsons test needed

Normality test Passed

Data looks normally distributed

pearson correlation test

cor.test(D1$age, D1$education, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  D1$age and D1$education
## t = 7.4066, df = 148, p-value = 9.113e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3924728 0.6279534
## sample estimates:
##       cor 
## 0.5200256

Report results

A pearson correlation was conducted to test the relationship between age (M = 35.326 years, SD = 11.463 years) and education (M = 13.82 years, SD = 2.596 years).

There was a statistically significant relationship between the two variables, r(148) = 0.5200, p < 0.001.

The relationship was positive and strong.

As age increased, education increased.