library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.6 v stringr 1.4.0
## v tidyr 1.1.2 v forcats 0.5.1
## v readr 1.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(broom)
library(ggfortify)
## Warning: package 'ggfortify' was built under R version 4.0.5
# iris flowers datasets
data(iris)
dim(iris)
## [1] 150 5
levels(iris$Species)
## [1] "setosa" "versicolor" "virginica"
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
setosa<-iris %>% select(Petal.Width, Petal.Length, Species) %>% filter(Species=="setosa")
head(setosa)
## Petal.Width Petal.Length Species
## 1 0.2 1.4 setosa
## 2 0.2 1.4 setosa
## 3 0.2 1.3 setosa
## 4 0.2 1.5 setosa
## 5 0.2 1.4 setosa
## 6 0.4 1.7 setosa
#linear regression
model <- lm(Petal.Length ~ Petal.Width, data = setosa)
model
##
## Call:
## lm(formula = Petal.Length ~ Petal.Width, data = setosa)
##
## Coefficients:
## (Intercept) Petal.Width
## 1.3276 0.5465
summary(model)
##
## Call:
## lm(formula = Petal.Length ~ Petal.Width, data = setosa)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.43686 -0.09151 -0.03686 0.09018 0.46314
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.32756 0.05996 22.141 <2e-16 ***
## Petal.Width 0.54649 0.22439 2.435 0.0186 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1655 on 48 degrees of freedom
## Multiple R-squared: 0.11, Adjusted R-squared: 0.09144
## F-statistic: 5.931 on 1 and 48 DF, p-value: 0.01864
model.diag.metrics <- augment(model)
head(model.diag.metrics)
## # A tibble: 6 x 8
## Petal.Length Petal.Width .fitted .resid .hat .sigma .cooksd .std.resid
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1.4 0.2 1.44 -0.0369 0.0239 0.167 0.000622 -0.225
## 2 1.4 0.2 1.44 -0.0369 0.0239 0.167 0.000622 -0.225
## 3 1.3 0.2 1.44 -0.137 0.0239 0.166 0.00857 -0.837
## 4 1.5 0.2 1.44 0.0631 0.0239 0.167 0.00182 0.386
## 5 1.4 0.2 1.44 -0.0369 0.0239 0.167 0.000622 -0.225
## 6 1.7 0.4 1.55 0.154 0.0636 0.166 0.0313 0.960
ggplot(model.diag.metrics, aes(Petal.Width,Petal.Length)) +
geom_point() +
stat_smooth(method = lm, se = FALSE) +
geom_segment(aes(xend = Petal.Width, yend = .fitted), color = "red", size = 0.3)
## `geom_smooth()` using formula 'y ~ x'

autoplot(model)
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
