Motor Trend is a magazine about the automobile industry. It is interested in exploring the relationship between a set of variables and miles per gallon (MPG) (outcome), particularly:
“Is an automatic or manual transmission better for MPG” “Quantify the MPG difference between automatic and manual transmissions” Using a data set from Motor Trend Magazine along with linear regression and hypothesis testing, it can be concluded that there is a significant difference between the MPG of automatic and manual transmission cars.
To quantify the MPG difference between automatic and manual transmission cars, a linear regression model that took into account the weight, the type of transmission and the acceleration (qsec) was used. Controlling for these factors, manual transmission cars have a better fuel efficiency of 2.94 MPG more than automatic transmission cars.
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.1.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data("mtcars")
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
mtcars$am<-as.factor(mtcars$am)
levels(mtcars$am)<-c("AT", "MT")
aggregate(mpg~am, data=mtcars, mean)
## am mpg
## 1 AT 17.14737
## 2 MT 24.39231
atData<-mtcars[mtcars$am == "AT",]
mtData<-mtcars[mtcars$am == "MT",]
t.test(atData$mpg, mtData$mpg)
##
## Welch Two Sample t-test
##
## data: atData$mpg and mtData$mpg
## t = -3.7671, df = 18.332, p-value = 0.001374
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -11.280194 -3.209684
## sample estimates:
## mean of x mean of y
## 17.14737 24.39231
ggplot(data = mtcars, aes(mpg)) + geom_histogram(color = "blue", fill = "pink") + facet_grid(.~am) + labs(x = "Miles per Gallon", y = "Frequency", title = "MPG Histogram for AT and MT cars")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = mtcars, aes(am,mpg)) + geom_boxplot(color = "blue", fill = "pink") + labs(x= "Transmission", y = "MPG", title = "MPG: AT vs MT")
ggplot(data = mtcars, aes(am,mpg)) + geom_boxplot(color = "blue", fill = "pink") + labs(x= "Transmission", y = "MPG", title = "MPG: AT vs MT")
Linear Models ### Model 1: Regress mpg against am
fit_1 <-lm(mpg~am, data = mtcars)
summary(fit_1)
##
## Call:
## lm(formula = mpg ~ am, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.3923 -3.0923 -0.2974 3.2439 9.5077
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.147 1.125 15.247 1.13e-15 ***
## amMT 7.245 1.764 4.106 0.000285 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.902 on 30 degrees of freedom
## Multiple R-squared: 0.3598, Adjusted R-squared: 0.3385
## F-statistic: 16.86 on 1 and 30 DF, p-value: 0.000285
fit_2 = step(lm(data = mtcars, mpg ~ .),trace=0,steps=10000)
summary(fit_2)
##
## Call:
## lm(formula = mpg ~ wt + qsec + am, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4811 -1.5555 -0.7257 1.4110 4.6610
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.6178 6.9596 1.382 0.177915
## wt -3.9165 0.7112 -5.507 6.95e-06 ***
## qsec 1.2259 0.2887 4.247 0.000216 ***
## amMT 2.9358 1.4109 2.081 0.046716 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.459 on 28 degrees of freedom
## Multiple R-squared: 0.8497, Adjusted R-squared: 0.8336
## F-statistic: 52.75 on 3 and 28 DF, p-value: 1.21e-11
fit_step<-lm(mpg~ am + wt + qsec, data = mtcars)
anova(fit_1, fit_step)
## Analysis of Variance Table
##
## Model 1: mpg ~ am
## Model 2: mpg ~ am + wt + qsec
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 30 720.90
## 2 28 169.29 2 551.61 45.618 1.55e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par(mfrow = c(2,2))
plot(fit_2)