library(plyr)
## Warning: package 'plyr' was built under R version 4.1.2
library(readxl)
## Warning: package 'readxl' was built under R version 4.1.2
#SHOE DATA -----------
Shoe_data <- read_excel("C:\\Users\\User\\Documents\\Bachelor of Science in Computer Science A.Y 2021-2022 (First Semester)\\CS 102 (DATA SCIENCE 2-DATA PREPARATION)\\Shoe_data.xlsx")
View(Shoe_data)

#Find the correlation between shoe size and height of the respondents
scatter.smooth(x=Shoe_data$Shoe_size, y=Shoe_data$Height, main="Scatter plot")

cor(Shoe_data$Height, Shoe_data$Shoe_size)
## [1] 0.7766089
#The value of R is 0.7766089 so the height and shoe size has a positive relationship, so as the shoe size increases then the height increases


#Interpret and calculate the results of p-value
linearmod_shoedata<-lm(Height~Shoe_size, data=Shoe_data)
linearmod_shoedata
## 
## Call:
## lm(formula = Height ~ Shoe_size, data = Shoe_data)
## 
## Coefficients:
## (Intercept)    Shoe_size  
##      54.112        1.536
summary(linearmod_shoedata)
## 
## Call:
## lm(formula = Height ~ Shoe_size, data = Shoe_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.1722 -1.5712  0.1325  1.8461  4.2549 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  54.1123     2.3524  23.003  < 2e-16 ***
## Shoe_size     1.5365     0.2444   6.286 1.18e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.604 on 26 degrees of freedom
## Multiple R-squared:  0.6031, Adjusted R-squared:  0.5879 
## F-statistic: 39.51 on 1 and 26 DF,  p-value: 1.183e-06
#Linear regression model is Y(HAT) = 54.11+1.54*X
#The p-value is 0000001.183, so the linear regression model is statistically significant since the p-value is less than 0.05
#Shoe size has a connection with height, therefore we can use model to predict the person's height