library(e1071)
data(cars)
attach(cars);
str(cars)
## 'data.frame': 50 obs. of 2 variables:
## $ speed: num 4 4 7 7 8 9 10 10 10 11 ...
## $ dist : num 2 10 4 22 16 10 18 26 34 17 ...
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
Cars dataset has only two variables - let’s explore them…
hist(speed, breaks = 50)

hist(dist, breaks = 50)

qqnorm(speed); qqline(speed, col = 2)

qqnorm(dist); qqline(dist, col = 2)

shapiro.test(speed)
##
## Shapiro-Wilk normality test
##
## data: speed
## W = 0.9776, p-value = 0.4576
shapiro.test(dist)
##
## Shapiro-Wilk normality test
##
## data: dist
## W = 0.9514, p-value = 0.0391
There seems to be a linear relation between the variables…
boxplot(cars)

library(corrplot)
correlations <- cor(cars);
correlations
## speed dist
## speed 1.0000000 0.8068949
## dist 0.8068949 1.0000000
corrplot(correlations)

And finally, we can add the fitted line to the original scatter plot.
plot(speed ~ dist); abline(model, col = "red")
