data(mtcars)
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
mean(mtcars$mpg)
## [1] 20.09062
mean(mtcars$cyl)
## [1] 6.1875
mean(mtcars$disp)
## [1] 230.7219
mean(mtcars$hp)
## [1] 146.6875
mean(mtcars$drat)
## [1] 3.596563
mean(mtcars$wt)
## [1] 3.21725
mean(mtcars$qsec)
## [1] 17.84875
mean(mtcars$vs)
## [1] 0.4375
mean(mtcars$am)
## [1] 0.40625
mean(mtcars$gear)
## [1] 3.6875
mean(mtcars$carb)
## [1] 2.8125
This it showing all the means of each variable.
ggplot(data = mtcars, aes(x = mpg, y = qsec)) + geom_point()
ggplot(data = mtcars, aes(x = mpg, y = wt)) + geom_point()
hist(mtcars$qsec)
I think that the correlation between mpg and wt is the most intriguing
because it shows that the more weight you have they more mpg you have on
the car and also has the strongest correlations.
## [1] 0
In this data set there are no missing values.
boxplot(mtcars$mpg)
boxplot(mtcars$cyl)
boxplot(mtcars$disp)
boxplot(mtcars$hp)
boxplot(mtcars$drat)
boxplot(mtcars$wt)
boxplot(mtcars$qsec)
boxplot(mtcars$vs)
boxplot(mtcars$am)
boxplot(mtcars$gear)
boxplot(mtcars$carb)
There is four variables with outliers in this data set.
Model <- lm(mpg ~ ., data = mtcars)
The top 3 coefficients in order are drat, qsec, and gear. This indicates they have the strongest correlation.
The linear assumptions are large sample size and no multi co linearity. The assumptions for this data set is met.
model_summ = summary(Model)
model_summ$r.squared
## [1] 0.8690158
lm_mse <- mean((Model$fitted.values - mtcars$mpg)^2)
lm_mse
## [1] 4.609201
Model_test <- lm(mpg ~ vs*am, data = mtcars)
Model_test_summ = summary(Model_test)
Model_test_summ$r.squared
## [1] 0.7002999
This tells me that the engine type and transmission are dependent of each other. This R-Squared decreased a bit with this interaction.
lower_bound_hp <- quantile(mtcars$hp, 0.01, na.rm = TRUE)
upper_bound_hp <- quantile(mtcars$hp, 0.97, na.rm = TRUE)
mtcars$hp[mtcars$hp < lower_bound_hp] <- lower_bound_hp
mtcars$hp[mtcars$hp > upper_bound_hp] <- upper_bound_hp
boxplot(mtcars$hp)
lower_bound_wt <- quantile(mtcars$wt, 0.01, na.rm = TRUE)
upper_bound_wt <- quantile(mtcars$wt, 0.95, na.rm = TRUE)
mtcars$wt[mtcars$wt < lower_bound_wt] <- lower_bound_wt
mtcars$wt[mtcars$wt > upper_bound_wt] <- upper_bound_wt
boxplot(mtcars$wt)
lower_bound_qsec <- quantile(mtcars$qsec, 0.01, na.rm = TRUE)
upper_bound_qsec <- quantile(mtcars$qsec, 0.97, na.rm = TRUE)
mtcars$qsec[mtcars$qsec < lower_bound_qsec] <- lower_bound_qsec
mtcars$qsec[mtcars$qsec > upper_bound_qsec] <- upper_bound_qsec
boxplot(mtcars$qsec)
lower_bound_carb <- quantile(mtcars$carb, 0.01, na.rm = TRUE)
upper_bound_carb <- quantile(mtcars$carb, 0.97, na.rm = TRUE)
mtcars$carb[mtcars$carb < lower_bound_carb] <- lower_bound_carb
mtcars$carb[mtcars$carb > upper_bound_carb] <- upper_bound_carb
boxplot(mtcars$carb)