## Homework 1: Polynomial Regression

Loading the Data

# load data
cars_data <- mtcars

# inspect the data
glimpse(cars_data)
## Rows: 32
## Columns: 11
## $ mpg  <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8,…
## $ cyl  <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8,…
## $ disp <dbl> 160.0, 160.0, 108.0, 258.0, 360.0, 225.0, 360.0, 146.7, 140.8, 16…
## $ hp   <dbl> 110, 110, 93, 110, 175, 105, 245, 62, 95, 123, 123, 180, 180, 180…
## $ drat <dbl> 3.90, 3.90, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92,…
## $ wt   <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.150, 3.…
## $ qsec <dbl> 16.46, 17.02, 18.61, 19.44, 17.02, 20.22, 15.84, 20.00, 22.90, 18…
## $ vs   <dbl> 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,…
## $ am   <dbl> 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,…
## $ gear <dbl> 4, 4, 4, 3, 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3,…
## $ carb <dbl> 4, 4, 1, 1, 2, 1, 4, 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2,…

Data Preprocessing (2 points)

# check for missing data
colSums(is.na(cars_data))
##  mpg  cyl disp   hp drat   wt qsec   vs   am gear carb 
##    0    0    0    0    0    0    0    0    0    0    0

Building the Polynomial Regression Model (2 points)

# fit the polynomial regression model
poly_model <- lm(mpg ~ hp + I(hp^2) + I(hp^3), data = cars_data)
summary(poly_model)
## 
## Call:
## lm(formula = mpg ~ hp + I(hp^2) + I(hp^3), data = cars_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.8605 -1.3972 -0.5736  1.6461  9.0738 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.422e+01  5.961e+00   7.419 4.43e-08 ***
## hp          -2.945e-01  1.178e-01  -2.500   0.0185 *  
## I(hp^2)      9.115e-04  6.863e-04   1.328   0.1949    
## I(hp^3)     -8.701e-07  1.204e-06  -0.722   0.4760    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.103 on 28 degrees of freedom
## Multiple R-squared:  0.7606, Adjusted R-squared:  0.7349 
## F-statistic: 29.65 on 3 and 28 DF,  p-value: 7.769e-09

Model Evaluation (3 points)

# model coefficients
coef(poly_model)
##   (Intercept)            hp       I(hp^2)       I(hp^3) 
##  4.422493e+01 -2.945289e-01  9.114683e-04 -8.701086e-07
# model R-squared value
summary(poly_model)$r.squared
## [1] 0.7605735
# plot residuals
plot(poly_model$residuals, main = "Residuals", ylab = "Residuals", xlab = "Index")
abline(h = 0, col = "red")

Visualization (3 points)

# visualize model
ggplot(cars_data, aes(x = hp, y = mpg)) +
  geom_point() +
  stat_smooth(method = "lm", formula = y ~ poly(x, 3), se = FALSE, color = "blue") +
  labs(title = "Polynomial Regression",
       x = "Horsepower",
       y = "Miles Per Gallon (mpg)")