library(plyr)
## Warning: package 'plyr' was built under R version 4.1.2
library(readxl)
## Warning: package 'readxl' was built under R version 4.1.2
#SHOE DATA -----------
Shoe_data <- read_excel("C:\\Users\\User\\Documents\\Bachelor of Science in Computer Science A.Y 2021-2022 (First Semester)\\CS 102 (DATA SCIENCE 2-DATA PREPARATION)\\Shoe_data.xlsx")
View(Shoe_data)
#Find the correlation between shoe size and height of the respondents
scatter.smooth(x=Shoe_data$Shoe_size, y=Shoe_data$Height, main="Scatter plot")

cor(Shoe_data$Height, Shoe_data$Shoe_size)
## [1] 0.7766089
#The value of R is 0.7766089 so the height and shoe size has a positive relationship, so as the shoe size increases then the height increases
#Interpret and calculate the results of p-value
linearmod_shoedata<-lm(Height~Shoe_size, data=Shoe_data)
linearmod_shoedata
##
## Call:
## lm(formula = Height ~ Shoe_size, data = Shoe_data)
##
## Coefficients:
## (Intercept) Shoe_size
## 54.112 1.536
summary(linearmod_shoedata)
##
## Call:
## lm(formula = Height ~ Shoe_size, data = Shoe_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.1722 -1.5712 0.1325 1.8461 4.2549
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 54.1123 2.3524 23.003 < 2e-16 ***
## Shoe_size 1.5365 0.2444 6.286 1.18e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.604 on 26 degrees of freedom
## Multiple R-squared: 0.6031, Adjusted R-squared: 0.5879
## F-statistic: 39.51 on 1 and 26 DF, p-value: 1.183e-06
#Linear regression model is Y(HAT) = 54.11+1.54*X
#The p-value is 0000001.183, so the linear regression model is statistically significant since the p-value is less than 0.05
#Shoe size has a connection with height, therefore we can use model to predict the person's height