## Homework 1: Polynomial Regression
# load data
cars_data <- mtcars
# inspect the data
glimpse(cars_data)
## Rows: 32
## Columns: 11
## $ mpg <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8,…
## $ cyl <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8,…
## $ disp <dbl> 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 146.7, 140.8, 16…
## $ hp <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, 180, 180…
## $ drat <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92,…
## $ wt <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.150, 3.…
## $ qsec <dbl> 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20.00, 22.90, 18…
## $ vs <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,…
## $ am <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,…
## $ gear <dbl> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3,…
## $ carb <dbl> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2,…
# check for missing data
colSums(is.na(cars_data))
## mpg cyl disp hp drat wt qsec vs am gear carb
## 0 0 0 0 0 0 0 0 0 0 0
# fit the polynomial regression model
poly_model <- lm(mpg ~ hp + I(hp^2) + I(hp^3), data = cars_data)
summary(poly_model)
##
## Call:
## lm(formula = mpg ~ hp + I(hp^2) + I(hp^3), data = cars_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.8605 -1.3972 -0.5736 1.6461 9.0738
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.422e+01 5.961e+00 7.419 4.43e-08 ***
## hp -2.945e-01 1.178e-01 -2.500 0.0185 *
## I(hp^2) 9.115e-04 6.863e-04 1.328 0.1949
## I(hp^3) -8.701e-07 1.204e-06 -0.722 0.4760
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.103 on 28 degrees of freedom
## Multiple R-squared: 0.7606, Adjusted R-squared: 0.7349
## F-statistic: 29.65 on 3 and 28 DF, p-value: 7.769e-09
# model coefficients
coef(poly_model)
## (Intercept) hp I(hp^2) I(hp^3)
## 4.422493e+01 -2.945289e-01 9.114683e-04 -8.701086e-07
# model R-squared value
summary(poly_model)$r.squared
## [1] 0.7605735
# plot residuals
plot(poly_model$residuals, main = "Residuals", ylab = "Residuals", xlab = "Index")
abline(h = 0, col = "red")
# visualize model
ggplot(cars_data, aes(x = hp, y = mpg)) +
geom_point() +
stat_smooth(method = "lm", formula = y ~ poly(x, 3), se = FALSE, color = "blue") +
labs(title = "Polynomial Regression",
x = "Horsepower",
y = "Miles Per Gallon (mpg)")