install.packages("psych", repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//Rtmpx5Fu9Q/downloaded_packages
library(psych)
install.packages("readr", repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//Rtmpx5Fu9Q/downloaded_packages
library(readr)
install.packages("dplyr", repos = "https://cloud.r-project.org")
## 
## The downloaded binary packages are in
##  /var/folders/5_/389qrkvs1sd7nkp792bslx5r0000gn/T//Rtmpx5Fu9Q/downloaded_packages
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
df4 <- data("Loblolly")
plot(x = Loblolly$age, 
     type = "h",
     main = "Tree Age")

hist( x = Loblolly$height, main = "Height of Tree")

?plot
## Help on topic 'plot' was found in the following packages:
## 
##   Package               Library
##   graphics              /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library
##   base                  /Library/Frameworks/R.framework/Resources/library
## 
## 
## Using the first match ...
plot( x = Loblolly$age,
      y = Loblolly$height,
      type = "p",
      main = "Height VS Age",
      xlab = "Age",
      ylab = "Height")

cor(Loblolly$age, Loblolly$height)
## [1] 0.9899132

We have a strong correlation between height and age of a tree.

reg1 <- lm (Loblolly$age ~ Loblolly$height)
summary(reg1)
## 
## Call:
## lm(formula = Loblolly$age ~ Loblolly$height)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5528 -0.7378  0.1421  0.6925  2.8966 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.757380   0.229203   3.304  0.00141 ** 
## Loblolly$height 0.378274   0.005979  63.272  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.126 on 82 degrees of freedom
## Multiple R-squared:  0.9799, Adjusted R-squared:  0.9797 
## F-statistic:  4003 on 1 and 82 DF,  p-value: < 2.2e-16
plot(reg1)

## Interpret

An 1 unit increase in height will increase age by 0.3782

when height is 0, age is 0.757380

T value is 63.272 and pvalue is 0.00000000000000022 sugesting a positive correlation.

The model explained 97.7 % varience in age.

Assumptions

The data shows a linear trend. Height and Age have a linear relationship.

As we can see on the normal q-q graph our model fits well on the line.

The variability of the points is fairly constant.

We don’t have any extreme value affecting the Cook’s distance.

Same data switching the variables.

reg2 <- lm( Loblolly$height ~ Loblolly$age)
summary(reg2)
## 
## Call:
## lm(formula = Loblolly$height ~ Loblolly$age)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.0207 -2.1672 -0.4391  2.0539  6.8545 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -1.31240    0.62183  -2.111   0.0379 *  
## Loblolly$age  2.59052    0.04094  63.272   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.947 on 82 degrees of freedom
## Multiple R-squared:  0.9799, Adjusted R-squared:  0.9797 
## F-statistic:  4003 on 1 and 82 DF,  p-value: < 2.2e-16
plot(reg2)