library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.0.6     v stringr 1.4.0
## v tidyr   1.1.2     v forcats 0.5.1
## v readr   1.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(broom)
library(ggfortify)
## Warning: package 'ggfortify' was built under R version 4.0.5
# iris flowers datasets
data(iris)
dim(iris)
## [1] 150   5
levels(iris$Species)
## [1] "setosa"     "versicolor" "virginica"
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
setosa<-iris %>% select(Petal.Width, Petal.Length, Species) %>% filter(Species=="setosa")
head(setosa)
##   Petal.Width Petal.Length Species
## 1         0.2          1.4  setosa
## 2         0.2          1.4  setosa
## 3         0.2          1.3  setosa
## 4         0.2          1.5  setosa
## 5         0.2          1.4  setosa
## 6         0.4          1.7  setosa
#linear regression
model <- lm(Petal.Length ~ Petal.Width, data = setosa)
model
## 
## Call:
## lm(formula = Petal.Length ~ Petal.Width, data = setosa)
## 
## Coefficients:
## (Intercept)  Petal.Width  
##      1.3276       0.5465
summary(model)
## 
## Call:
## lm(formula = Petal.Length ~ Petal.Width, data = setosa)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.43686 -0.09151 -0.03686  0.09018  0.46314 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.32756    0.05996  22.141   <2e-16 ***
## Petal.Width  0.54649    0.22439   2.435   0.0186 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1655 on 48 degrees of freedom
## Multiple R-squared:   0.11,  Adjusted R-squared:  0.09144 
## F-statistic: 5.931 on 1 and 48 DF,  p-value: 0.01864
model.diag.metrics <- augment(model)
head(model.diag.metrics)
## # A tibble: 6 x 8
##   Petal.Length Petal.Width .fitted  .resid   .hat .sigma  .cooksd .std.resid
##          <dbl>       <dbl>   <dbl>   <dbl>  <dbl>  <dbl>    <dbl>      <dbl>
## 1          1.4         0.2    1.44 -0.0369 0.0239  0.167 0.000622     -0.225
## 2          1.4         0.2    1.44 -0.0369 0.0239  0.167 0.000622     -0.225
## 3          1.3         0.2    1.44 -0.137  0.0239  0.166 0.00857      -0.837
## 4          1.5         0.2    1.44  0.0631 0.0239  0.167 0.00182       0.386
## 5          1.4         0.2    1.44 -0.0369 0.0239  0.167 0.000622     -0.225
## 6          1.7         0.4    1.55  0.154  0.0636  0.166 0.0313        0.960
ggplot(model.diag.metrics, aes(Petal.Width,Petal.Length)) +
  geom_point() +
  stat_smooth(method = lm, se = FALSE) +
  geom_segment(aes(xend = Petal.Width, yend = .fitted), color = "red", size = 0.3)
## `geom_smooth()` using formula 'y ~ x'

autoplot(model)
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.