#Open the Installed Packages:
library("readxl")
library("ggpubr")
## Loading required package: ggplot2
#Import and Name Dataset:
A4Q1 <- read_excel("C:/Users/krish/Downloads/A4Q1.xlsx")

#Create a Scatterplot to Visualize the Relationship:
ggscatter(
  A4Q1,
  x = "age",
  y = "education",
  add = "reg.line",
  xlab = "Age",
  ylab = "USD"
)

#Interpret the Scatterplot:
# "The relationship is linear".
# "The relationship is positive".
# "The relationship is weak".
# "There are outliers".

#The Descriptive Statistics:
mean(A4Q1$age)
## [1] 35.32634
sd(A4Q1$age)
## [1] 11.45344
median(A4Q1$age)
## [1] 35.79811
mean(A4Q1$education)
## [1] 13.82705
sd(A4Q1$education)
## [1] 2.595901
median(A4Q1$education)
## [1] 14.02915
#Check Normality Visually (Histograms):
hist(A4Q1$age,
     main = "age",
     breaks = 20,
     col = "lightblue",
     border = "white",
     cex.main = 1,
     cex.axis = 1,
     cex.lab = 1)

hist(A4Q1$education,
     main = "education",
     breaks = 20,
     col = "lightcoral",
     border = "white",
     cex.main = 1,
     cex.axis = 1,
     cex.lab = 1)

# Interpret the Histograms:
# "Variable 1: age"
# "The first variable looks normally distributed".
# "The data is symmetrical".
# "The data has a proper bell curve".
# "Variable 2: eduaction"
# "The second variable looks normally distributed".
# "The data is symmetrical".
# "The data has a proper bell curve".

# Check Normality Statistically (Shapiro-Wilk Test):
shapiro.test(A4Q1$age)
## 
##  Shapiro-Wilk normality test
## 
## data:  A4Q1$age
## W = 0.99194, p-value = 0.5581
shapiro.test(A4Q1$education)
## 
##  Shapiro-Wilk normality test
## 
## data:  A4Q1$education
## W = 0.9908, p-value = 0.4385
#Interpret the Shapiro-Wilk Test:
# "Variable 1: age"
# "The first variable is normally distributed (p =0.5581)".
# "Variable 2: education"
# "The second variable is normally distributed (p =0.4385)".

#"since Both histograms are normal & both Shapiro-Wilk tests are normal,Use Pearson Correlation".

#Conduct Pearson Correlation:
cor.test(A4Q1$age, A4Q1$education, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  A4Q1$age and A4Q1$education
## t = 7.4066, df = 148, p-value = 9.113e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3924728 0.6279534
## sample estimates:
##       cor 
## 0.5200256
#Report the Pearson Correlation:
# "A Pearson correlation was conducted to test the relationship between a person's age  (M =35.32634, SD =11.45344) and education (M = 13.82705, SD = 2.595901)".
# "There was a statistically significant relationship between the two variables, r(148) = 0.52, p < 0.001".
# "The relationship was strong".
# "As age increased, education increased".