| title: “a4q1” |
| author: “koteswara vara prasad” |
| date: “2026-04-10” |
| output: html_document |
library(readxl)
library(ggpubr)
## Loading required package: ggplot2
data <- read_excel("C:/Users/kvpra/Documents/spring 2026/rlaanguage/A4Q1.xlsx")
head(data)
## # A tibble: 6 × 2
## age education
## <dbl> <dbl>
## 1 42.0 13.2
## 2 38.0 12.6
## 3 16.4 10.3
## 4 33.8 16.2
## 5 33.4 14.0
## 6 14.3 11.4
View(data)
summary(data)
## age education
## Min. : 8.657 Min. : 6.196
## 1st Qu.:27.887 1st Qu.:12.024
## Median :35.798 Median :14.029
## Mean :35.326 Mean :13.827
## 3rd Qu.:42.549 3rd Qu.:15.851
## Max. :62.923 Max. :19.727
ggscatter(
data,
x = "age",
y = "education",
add = "reg.line",
xlab = "Age",
ylab = "Years of education"
)

# The relationship is [linear].
# The relationship is [positive ].
# The relationship is [ moderate).
# There [ are no] outliers.
mean(data$age)
## [1] 35.32634
sd(data$age)
## [1] 11.45344
median(data$age)
## [1] 35.79811
mean(data$education)
## [1] 13.82705
sd(data$education)
## [1] 2.595901
median(data$education)
## [1] 14.02915
hist(data$age,
main = "Age Distribution",
breaks = 20,
col = "lightblue",
border = "white",
xlab = "Age",
cex.main = 1,
cex.axis = 1,
cex.lab = 1)

hist(data$education,
main = "Education Distribution",
breaks = 20,
col = "lightgreen",
border = "white",
xlab = "Years of Education")

# Variable 1: Age
# The first variable looks approximately normally distributed.
# The data is slightly positively skewed.
# The data does not have a perfect bell curve.
# Variable 2: Education
# The second variable education looks normally distributed.
# The data is symmetrical.
# The data has a proper bell curve.
shapiro.test(data$age)
##
## Shapiro-Wilk normality test
##
## data: data$age
## W = 0.99194, p-value = 0.5581
shapiro.test(data$education)
##
## Shapiro-Wilk normality test
##
## data: data$education
## W = 0.9908, p-value = 0.4385
# Variable 1: Age
# The first variable is normally distributed (p = .56).
# Variable 2: Education
# The second variable is normally distributed (p = .44).
#Pearson correlation
cor.test(data$age, data$education, method = "pearson")
##
## Pearson's product-moment correlation
##
## data: data$age and data$education
## t = 7.4066, df = 148, p-value = 9.113e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.3924728 0.6279534
## sample estimates:
## cor
## 0.5200256
#A Pearson correlation was conducted to test the relationship between age (M = 35.32634, SD = 11.45344) and education (M = 13.82705, SD = 2.595901).
#There was a statistically significant relationship between the two variables, r(148) = .52, p < .001.
#The relationship was positive and moderate.
#As age increased, education increased.