Assign1.knit

title: “a4q1”

author: “koteswara vara prasad”

date: “2026-04-10”

output: html_document

library(readxl)
library(ggpubr)

## Loading required package: ggplot2

data <- read_excel("C:/Users/kvpra/Documents/spring 2026/rlaanguage/A4Q1.xlsx")
head(data)

## # A tibble: 6 × 2
##     age education
##   <dbl>     <dbl>
## 1  42.0      13.2
## 2  38.0      12.6
## 3  16.4      10.3
## 4  33.8      16.2
## 5  33.4      14.0
## 6  14.3      11.4

View(data)
summary(data)

##       age           education     
##  Min.   : 8.657   Min.   : 6.196  
##  1st Qu.:27.887   1st Qu.:12.024  
##  Median :35.798   Median :14.029  
##  Mean   :35.326   Mean   :13.827  
##  3rd Qu.:42.549   3rd Qu.:15.851  
##  Max.   :62.923   Max.   :19.727

ggscatter(
  data,
  x = "age",
  y = "education",
  add = "reg.line",
  xlab = "Age",
  ylab = "Years of education"
)

# The relationship is [linear].

# The relationship is [positive ].

# The relationship is [ moderate).

# There [ are no] outliers.
mean(data$age)

## [1] 35.32634

sd(data$age)

## [1] 11.45344

median(data$age)

## [1] 35.79811

mean(data$education)

## [1] 13.82705

sd(data$education)

## [1] 2.595901

median(data$education)

## [1] 14.02915

hist(data$age,
     main = "Age Distribution",
     breaks = 20,
     col = "lightblue",
     border = "white",
     xlab = "Age",
     cex.main = 1,
     cex.axis = 1,
     cex.lab = 1)

hist(data$education,
     main = "Education Distribution",
     breaks = 20,
     col = "lightgreen",
     border = "white",
     xlab = "Years of Education")

# Variable 1: Age
# The first variable looks approximately normally distributed.
# The data is slightly positively skewed.
# The data does not have a perfect bell curve.

# Variable 2: Education
# The second variable education looks normally distributed.
# The data is symmetrical.
# The data has a proper bell curve.

shapiro.test(data$age)

## 
##  Shapiro-Wilk normality test
## 
## data:  data$age
## W = 0.99194, p-value = 0.5581

shapiro.test(data$education)

## 
##  Shapiro-Wilk normality test
## 
## data:  data$education
## W = 0.9908, p-value = 0.4385

# Variable 1: Age
# The first variable is normally distributed (p = .56).

# Variable 2: Education
# The second variable is normally distributed (p = .44).

#Pearson correlation
cor.test(data$age, data$education, method = "pearson")

## 
##  Pearson's product-moment correlation
## 
## data:  data$age and data$education
## t = 7.4066, df = 148, p-value = 9.113e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3924728 0.6279534
## sample estimates:
##       cor 
## 0.5200256

#A Pearson correlation was conducted to test the relationship between age (M =  35.32634, SD = 11.45344) and education (M = 13.82705, SD = 2.595901).
#There was a statistically significant relationship between the two variables, r(148) = .52, p < .001.
#The relationship was positive and moderate.
#As age increased, education increased.