library(readxl)
library(ggpubr)
## Loading required package: ggplot2
A4Q1 <- read_excel("C:/Users/lahar/Downloads/A4Q1.xlsx")
ggscatter(
  A4Q1,
  x = "age",
  y = "education",
  add = "reg.line",
  xlab = "age",
  ylab = "education"
)

# The relationship is linear.

# The relationship is positive.

# The relationship is moderate or strong.

# There are no outliers
mean(A4Q1$age)
## [1] 35.32634
sd(A4Q1$age)
## [1] 11.45344
median(A4Q1$age)
## [1] 35.79811
mean(A4Q1$education)
## [1] 13.82705
sd(A4Q1$education)
## [1] 2.595901
median(A4Q1$education)
## [1] 14.02915
hist(A4Q1$age,
            main = "age",
            breaks = 20,
            col = "lightblue",
            border = "white",
            cex.main = 1,
            cex.axis = 1,
            cex.lab = 1)

# Variable 1: age
# The first variable looks normally distributed.
# The data is symmetrical.
# The data has a proper bell curve.

 hist(A4Q1$education,
            main = "education",
            breaks = 20,
            col = "lightcoral",
            border = "white",
            cex.main = 1,
            cex.axis = 1,
            cex.lab = 1)

      # Variable 2: education
       # The second variable looks normally distributed.
       # The data is symmetrical.
       # The data has a proper bell curve.
 
 shapiro.test(A4Q1$age)
## 
##  Shapiro-Wilk normality test
## 
## data:  A4Q1$age
## W = 0.99194, p-value = 0.5581
 shapiro.test(A4Q1$education)
## 
##  Shapiro-Wilk normality test
## 
## data:  A4Q1$education
## W = 0.9908, p-value = 0.4385
 # Variable 1: age
 # The first variable is normally distributed (p = 0.5581).
 
 # Variable 2: education
 # The second variable is normally distributed (p = 0.4385).
 
 cor.test(A4Q1$age, A4Q1$education, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  A4Q1$age and A4Q1$education
## t = 7.4066, df = 148, p-value = 9.113e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3924728 0.6279534
## sample estimates:
##       cor 
## 0.5200256
 # A Pearson correlation was conducted to test the relationship between Variable 1 (M =35.32 , SD = 11.45) and Variable 2 (M = 13.82, SD = 2.59).
 # There [was / was not] a statistically significant relationship between the two variables, r(148) = 0.52, p = < .001.
# The relationship was positive and strong.
# As age increased, income increased.