#Load the Dataset
data(trees)

#Look at the first few rows of the dataset
head(trees)
##   Girth Height Volume
## 1   8.3     70   10.3
## 2   8.6     65   10.3
## 3   8.8     63   10.2
## 4  10.5     72   16.4
## 5  10.7     81   18.8
## 6  10.8     83   19.7
#Scatter Plot of Girth vs Volume
plot(trees$Girth,trees$Volume)

#Linear Regression 
lm(Volume ~ Girth,data = trees)
## 
## Call:
## lm(formula = Volume ~ Girth, data = trees)
## 
## Coefficients:
## (Intercept)        Girth  
##     -36.943        5.066
#Store the output into result variable
lm(Volume ~ Girth,data = trees)->result

#Check the summary of the output stored 
summary(result)
## 
## Call:
## lm(formula = Volume ~ Girth, data = trees)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -8.065 -3.107  0.152  3.495  9.587 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -36.9435     3.3651  -10.98 7.62e-12 ***
## Girth         5.0659     0.2474   20.48  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.252 on 29 degrees of freedom
## Multiple R-squared:  0.9353, Adjusted R-squared:  0.9331 
## F-statistic: 419.4 on 1 and 29 DF,  p-value: < 2.2e-16
# Call line shows the same code line
# Residuals shows the distance between 
# regression line & actual data point
# Assumption of linear regression model: 
# residuals are normally distributed

# Ideally median of residuals = 0 
# & even distribution of max, min, 1Q & 3Q

#You can draw a histogram
hist(result$residuals)

# Can see a nearly normal distribution.

# Next: Study the Coefficients box

# Intercept is where the line cuts the Y axis ie. when Girth =0
# This Intercept value c = -36.9435 ie. -ve Y

# The Girth Coefficient (m) is the slope of the line 
# ie. if Girth changes by 1 unit then how many units will Volume change
# In this case Slope is 5.0659

# Now we have the linear eqution y = mx+c
# ie. Volume = 5.0659Girth - 36.9435
# We can use equation to predict any Volume given Girth 

# Look at the Pr(>|t|) values for Intercept & Girth
# This is the p value ie Significance for Null Hypothesis

# The Null Hypothesis for Intercept is Intercept = 0. 
# With p<<0.05 we can reject the Null ie. accept there is an intercept

# The Null Hypothesis for Girth is slope m = 0. 
# With p<<0.05 we can reject the Null ie. accept there is a non zero slope
# Or we can say Girth is a significant influencer of Volume

# Next look at Residual Standard Error
# Smaller this number more accurate the model
# And Multiple R Square value shows how much of Volume is explained by Girth
# Girth can explain 93.53% of Volume - thats significant!!

# Last: look at the p value of the F Statistic << p=0.05
# This means we can reject Null that there is no relation between Girth & Volume

# To show the regression line on the scatter plot
plot(trees$Girth,trees$Volume)
abline(result)

#Using the model for manual prediction -36.9435+5.0659*X

-36.9435+5.0659*11
## [1] 18.7814
-36.9435+5.0659*12
## [1] 23.8473
-36.9435+5.0659*13
## [1] 28.9132
#To predict, use dataframe and store the new values
ngirth<-data.frame(Girth=c(11,12,13))

#Use predict function predict(model,dataframe)
predict(result,ngirth)
##        1        2        3 
## 18.78096 23.84682 28.91267
#See the new volumes predicted

#Exercise: Create a linear model for cars dataset