This report investigates whether manual or automatic transmissions provide better fuel efficiency (measured in miles per gallon, MPG) using the mtcars dataset.
We conduct exploratory analysis, fit regression models, examine residuals, and quantify the MPG difference between transmission types while accounting for confounders such as weight and horsepower.
library(ggplot2)
library(dplyr)
data(mtcars)
mtcars$am <- factor(mtcars$am, labels = c("Automatic", "Manual"))
model1 <- lm(mpg ~ am, data = mtcars)
summary(model1)
##
## Call:
## lm(formula = mpg ~ am, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.3923 -3.0923 -0.2974 3.2439 9.5077
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.147 1.125 15.247 1.13e-15 ***
## amManual 7.245 1.764 4.106 0.000285 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.902 on 30 degrees of freedom
## Multiple R-squared: 0.3598, Adjusted R-squared: 0.3385
## F-statistic: 16.86 on 1 and 30 DF, p-value: 0.000285
model2 <- lm(mpg ~ am + wt, data = mtcars)
summary(model2)
##
## Call:
## lm(formula = mpg ~ am + wt, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5295 -2.3619 -0.1317 1.4025 6.8782
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.32155 3.05464 12.218 5.84e-13 ***
## amManual -0.02362 1.54565 -0.015 0.988
## wt -5.35281 0.78824 -6.791 1.87e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.098 on 29 degrees of freedom
## Multiple R-squared: 0.7528, Adjusted R-squared: 0.7358
## F-statistic: 44.17 on 2 and 29 DF, p-value: 1.579e-09
model3 <- lm(mpg ~ am + wt + hp, data = mtcars)
summary(model3)
##
## Call:
## lm(formula = mpg ~ am + wt + hp, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4221 -1.7924 -0.3788 1.2249 5.5317
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.002875 2.642659 12.867 2.82e-13 ***
## amManual 2.083710 1.376420 1.514 0.141268
## wt -2.878575 0.904971 -3.181 0.003574 **
## hp -0.037479 0.009605 -3.902 0.000546 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.538 on 28 degrees of freedom
## Multiple R-squared: 0.8399, Adjusted R-squared: 0.8227
## F-statistic: 48.96 on 3 and 28 DF, p-value: 2.908e-11
anova(model1, model2, model3)
## Analysis of Variance Table
##
## Model 1: mpg ~ am
## Model 2: mpg ~ am + wt
## Model 3: mpg ~ am + wt + hp
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 30 720.90
## 2 29 278.32 1 442.58 68.734 5.071e-09 ***
## 3 28 180.29 1 98.03 15.224 0.0005464 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
data.frame(
Model = c("Model 1", "Model 2", "Model 3"),
Adj_R_Squared = c(summary(model1)$adj.r.squared,
summary(model2)$adj.r.squared,
summary(model3)$adj.r.squared)
)
## Model Adj_R_Squared
## 1 Model 1 0.3384589
## 2 Model 2 0.7357889
## 3 Model 3 0.8227357
summary(model3)
##
## Call:
## lm(formula = mpg ~ am + wt + hp, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4221 -1.7924 -0.3788 1.2249 5.5317
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 34.002875 2.642659 12.867 2.82e-13 ***
## amManual 2.083710 1.376420 1.514 0.141268
## wt -2.878575 0.904971 -3.181 0.003574 **
## hp -0.037479 0.009605 -3.902 0.000546 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.538 on 28 degrees of freedom
## Multiple R-squared: 0.8399, Adjusted R-squared: 0.8227
## F-statistic: 48.96 on 3 and 28 DF, p-value: 2.908e-11
confint(model3)
## 2.5 % 97.5 %
## (Intercept) 28.58963286 39.41611738
## amManual -0.73575874 4.90317900
## wt -4.73232353 -1.02482730
## hp -0.05715454 -0.01780291
par(mfrow = c(2, 2))
plot(model3)
resid_df <- data.frame(fitted = fitted(model3), resid = residuals(model3))
ggplot(resid_df, aes(fitted, resid)) +
geom_point() +
geom_hline(yintercept = 0, color = "red") +
labs(title = "Residual Plot", x = "Fitted Values", y = "Residuals") +
theme_minimal()
round(cor(mtcars[, c("mpg", "wt", "hp")]), 2)
## mpg wt hp
## mpg 1.00 -0.87 -0.78
## wt -0.87 1.00 0.66
## hp -0.78 0.66 1.00
pairs(mtcars[, c("mpg", "wt", "hp", "am")], main = "Pairwise Relationships")