#load the library and import the data set 

##-------------- Shoe Size and Height ---------------##

library(readxl)
shoe = read_excel("C:\\Users\\user\\Downloads\\Shoe.xlsx")
shoe
## # A tibble: 28 x 3
##    shoe_size height gender
##        <dbl>  <dbl> <chr> 
##  1       6.5   66   F     
##  2       9     68   F     
##  3       8.5   64.5 F     
##  4       8.5   65   F     
##  5      10.5   70   M     
##  6       7     64   F     
##  7       9.5   70   F     
##  8       9     71   F     
##  9      13     72   M     
## 10       7.5   64   F     
## # ... with 18 more rows
## Find the correlation between shoe size and height of the respondents. 
## Use 0.05 level of significance.

scatter.smooth(x=shoe$shoe_size, 
               y=shoe$height, 
               main="Scatter Plot")

cor(shoe$height, shoe$shoe_size)
## [1] 0.7766089
## Based on the value of r which is 0.7766089 , the height and shoe size has a 
# positive relationship. This means that as the shoe size  increases 
# the height increases.  

# Calculate the p-values. Interpret  the result. 

linearMod_shoe_size <-lm(height~shoe_size, data=shoe)
linearMod_shoe_size
## 
## Call:
## lm(formula = height ~ shoe_size, data = shoe)
## 
## Coefficients:
## (Intercept)    shoe_size  
##      54.112        1.536
summary(linearMod_shoe_size)
## 
## Call:
## lm(formula = height ~ shoe_size, data = shoe)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.1722 -1.5712  0.1325  1.8461  4.2549 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  54.1123     2.3524  23.003  < 2e-16 ***
## shoe_size     1.5365     0.2444   6.286 1.18e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.604 on 26 degrees of freedom
## Multiple R-squared:  0.6031, Adjusted R-squared:  0.5879 
## F-statistic: 39.51 on 1 and 26 DF,  p-value: 1.183e-06
# Linear regression model: Y(hat) = 54.11 + 1.54 * X

## Based on the of p-value which is 0000001.183 , the linear regression model 
# is statistically significant since the p-value is less than 0.05. Also, we can
# conclude that shoe size has a connection  with height. Therefore, 
# we can use the model to predict the height of a person. 

##-------------- Shoe Size and Height ---------------##

#load the library and import the data set 


orions = read_excel("C:\\Users\\user\\Downloads\\Orions.xlsx")
orions
## # A tibble: 11 x 2
##      age price
##    <dbl> <dbl>
##  1     5    85
##  2     4   103
##  3     6    70
##  4     5    82
##  5     5    89
##  6     5    98
##  7     6    66
##  8     6    95
##  9     2   169
## 10     7    70
## 11     7    48
## Set up scatter plot

scatter.smooth(x=orions$age, 
               y=orions$price, 
               main="Scatter Plot")
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at 5
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at 5
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at 5
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at 5
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at 5
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 1
cor(orions$age,orions$price)
## [1] -0.9237821
##a. Determine the regression equation for the data.

linearMod_price <-lm(price~age, data=orions)
linearMod_price
## 
## Call:
## lm(formula = price ~ age, data = orions)
## 
## Coefficients:
## (Intercept)          age  
##      195.47       -20.26
## The regression equation Y(hat) = 195.47 - 20.26 * X

##b. Graph the regression equation and the data points.

scatter.smooth(x=orions$age, 
               y=orions$price, 
               main="Scatter Plot")
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at 5
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at 5
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at 5
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at 5
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## pseudoinverse used at 5
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## neighborhood radius 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric = FALSE, :
## There are other near singularities as well. 1

##c. Describe the apparent relationship between age and price of Orions

## Based on the graph, the age and price of orions has a negative relationship. 
## This means that, as age increases the price of orions decreases. 

##d. Interpret the slope of the regression line in terms of prices for Orions.

## Since the slope is -20.26, this implies that when the age increases by 1
## the price decreases by 20.26.

##e. Use the regression equation to predict the price of a 3-year old Orion and 4-year-old Orion.

price_3_year_old = 195.47 - 20.26 * 3
price_3_year_old
## [1] 134.69
# Therefore, the price of a 3-year old orion is 134.69

price_4_year_old = 195.47 - 20.26 * 4
price_4_year_old
## [1] 114.43
# Therefore, the price of a 3-year old orion is 114.43