Executive Summary

This report investigates whether manual or automatic transmissions provide better fuel efficiency (measured in miles per gallon, MPG) using the mtcars dataset.
We conduct exploratory analysis, fit regression models, examine residuals, and quantify the MPG difference between transmission types while accounting for confounders such as weight and horsepower.


1. Exploratory Data Analysis

library(ggplot2)
library(dplyr)

data(mtcars)
mtcars$am <- factor(mtcars$am, labels = c("Automatic", "Manual"))
model1 <- lm(mpg ~ am, data = mtcars)
summary(model1)
## 
## Call:
## lm(formula = mpg ~ am, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.3923 -3.0923 -0.2974  3.2439  9.5077 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   17.147      1.125  15.247 1.13e-15 ***
## amManual       7.245      1.764   4.106 0.000285 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.902 on 30 degrees of freedom
## Multiple R-squared:  0.3598, Adjusted R-squared:  0.3385 
## F-statistic: 16.86 on 1 and 30 DF,  p-value: 0.000285
model2 <- lm(mpg ~ am + wt, data = mtcars)
summary(model2)
## 
## Call:
## lm(formula = mpg ~ am + wt, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.5295 -2.3619 -0.1317  1.4025  6.8782 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 37.32155    3.05464  12.218 5.84e-13 ***
## amManual    -0.02362    1.54565  -0.015    0.988    
## wt          -5.35281    0.78824  -6.791 1.87e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.098 on 29 degrees of freedom
## Multiple R-squared:  0.7528, Adjusted R-squared:  0.7358 
## F-statistic: 44.17 on 2 and 29 DF,  p-value: 1.579e-09
model3 <- lm(mpg ~ am + wt + hp, data = mtcars)
summary(model3)
## 
## Call:
## lm(formula = mpg ~ am + wt + hp, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4221 -1.7924 -0.3788  1.2249  5.5317 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 34.002875   2.642659  12.867 2.82e-13 ***
## amManual     2.083710   1.376420   1.514 0.141268    
## wt          -2.878575   0.904971  -3.181 0.003574 ** 
## hp          -0.037479   0.009605  -3.902 0.000546 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.538 on 28 degrees of freedom
## Multiple R-squared:  0.8399, Adjusted R-squared:  0.8227 
## F-statistic: 48.96 on 3 and 28 DF,  p-value: 2.908e-11
anova(model1, model2, model3)
## Analysis of Variance Table
## 
## Model 1: mpg ~ am
## Model 2: mpg ~ am + wt
## Model 3: mpg ~ am + wt + hp
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1     30 720.90                                  
## 2     29 278.32  1    442.58 68.734 5.071e-09 ***
## 3     28 180.29  1     98.03 15.224 0.0005464 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
data.frame(
Model = c("Model 1", "Model 2", "Model 3"),
Adj_R_Squared = c(summary(model1)$adj.r.squared,
summary(model2)$adj.r.squared,
summary(model3)$adj.r.squared)
)
##     Model Adj_R_Squared
## 1 Model 1     0.3384589
## 2 Model 2     0.7357889
## 3 Model 3     0.8227357
summary(model3)
## 
## Call:
## lm(formula = mpg ~ am + wt + hp, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4221 -1.7924 -0.3788  1.2249  5.5317 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 34.002875   2.642659  12.867 2.82e-13 ***
## amManual     2.083710   1.376420   1.514 0.141268    
## wt          -2.878575   0.904971  -3.181 0.003574 ** 
## hp          -0.037479   0.009605  -3.902 0.000546 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.538 on 28 degrees of freedom
## Multiple R-squared:  0.8399, Adjusted R-squared:  0.8227 
## F-statistic: 48.96 on 3 and 28 DF,  p-value: 2.908e-11
confint(model3)
##                   2.5 %      97.5 %
## (Intercept) 28.58963286 39.41611738
## amManual    -0.73575874  4.90317900
## wt          -4.73232353 -1.02482730
## hp          -0.05715454 -0.01780291
par(mfrow = c(2, 2))
plot(model3)

resid_df <- data.frame(fitted = fitted(model3), resid = residuals(model3))

ggplot(resid_df, aes(fitted, resid)) +
geom_point() +
geom_hline(yintercept = 0, color = "red") +
labs(title = "Residual Plot", x = "Fitted Values", y = "Residuals") +
theme_minimal()

round(cor(mtcars[, c("mpg", "wt", "hp")]), 2)
##       mpg    wt    hp
## mpg  1.00 -0.87 -0.78
## wt  -0.87  1.00  0.66
## hp  -0.78  0.66  1.00
pairs(mtcars[, c("mpg", "wt", "hp", "am")], main = "Pairwise Relationships")