##create data
data(mtcars)
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
summary(mtcars)
## mpg cyl disp hp
## Min. :10.40 Min. :4.000 Min. : 71.1 Min. : 52.0
## 1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8 1st Qu.: 96.5
## Median :19.20 Median :6.000 Median :196.3 Median :123.0
## Mean :20.09 Mean :6.188 Mean :230.7 Mean :146.7
## 3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0 3rd Qu.:180.0
## Max. :33.90 Max. :8.000 Max. :472.0 Max. :335.0
## drat wt qsec vs
## Min. :2.760 Min. :1.513 Min. :14.50 Min. :0.0000
## 1st Qu.:3.080 1st Qu.:2.581 1st Qu.:16.89 1st Qu.:0.0000
## Median :3.695 Median :3.325 Median :17.71 Median :0.0000
## Mean :3.597 Mean :3.217 Mean :17.85 Mean :0.4375
## 3rd Qu.:3.920 3rd Qu.:3.610 3rd Qu.:18.90 3rd Qu.:1.0000
## Max. :4.930 Max. :5.424 Max. :22.90 Max. :1.0000
## am gear carb
## Min. :0.0000 Min. :3.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:2.000
## Median :0.0000 Median :4.000 Median :2.000
## Mean :0.4062 Mean :3.688 Mean :2.812
## 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :1.0000 Max. :5.000 Max. :8.000
cat("korelasi mpg dan hp: ", cor(mtcars$mpg, mtcars$hp))
## korelasi mpg dan hp: -0.7761684
cat("korelasi mpg dan wt: ", cor(mtcars$mpg, mtcars$wt))
## korelasi mpg dan wt: -0.8676594
model1 <- lm(mpg ~ hp, data = mtcars)
model1
##
## Call:
## lm(formula = mpg ~ hp, data = mtcars)
##
## Coefficients:
## (Intercept) hp
## 30.09886 -0.06823
summary(model1)
##
## Call:
## lm(formula = mpg ~ hp, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.7121 -2.1122 -0.8854 1.5819 8.2360
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.09886 1.63392 18.421 < 2e-16 ***
## hp -0.06823 0.01012 -6.742 1.79e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.863 on 30 degrees of freedom
## Multiple R-squared: 0.6024, Adjusted R-squared: 0.5892
## F-statistic: 45.46 on 1 and 30 DF, p-value: 1.788e-07
model2 <- lm(mpg ~ hp + wt, data = mtcars)
model2
##
## Call:
## lm(formula = mpg ~ hp + wt, data = mtcars)
##
## Coefficients:
## (Intercept) hp wt
## 37.22727 -0.03177 -3.87783
summary(model2)
##
## Call:
## lm(formula = mpg ~ hp + wt, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.941 -1.600 -0.182 1.050 5.854
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.22727 1.59879 23.285 < 2e-16 ***
## hp -0.03177 0.00903 -3.519 0.00145 **
## wt -3.87783 0.63273 -6.129 1.12e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.593 on 29 degrees of freedom
## Multiple R-squared: 0.8268, Adjusted R-squared: 0.8148
## F-statistic: 69.21 on 2 and 29 DF, p-value: 9.109e-12
plot(mtcars$hp, mtcars$mpg,
main = "Hubungan HP dengan MPG",
xlab = "Horsepower (hp)",
ylab = "Miles per Gallon (mpg)",
pch = 19)
abline(model1, col = "blue", lwd = 2)
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.4.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.4.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
resettest(model1)
##
## RESET test
##
## data: model1
## RESET = 9.2467, df1 = 2, df2 = 28, p-value = 0.0008255
resettest(model2)
##
## RESET test
##
## data: model2
## RESET = 7.2384, df1 = 2, df2 = 27, p-value = 0.003041
model1 <- lm(mpg ~ hp, data = mtcars)
plot(model1$fitted.values, model1$residuals,
main = "Uji Homoskedastisitas Model 1",
xlab = "Fitted Values",
ylab = "Residuals",
pch = 19)
abline(h = 0, col = "red", lwd = 2)
model2 <- lm(mpg ~ hp + wt, data = mtcars)
plot(model1$fitted.values, model2$residuals,
main = "Uji Homoskedastisitas Model 2",
xlab = "Fitted Values",
ylab = "Residuals",
pch = 19)
abline(h = 0, col = "green", lwd = 2)
library(lmtest)
bptest(model1)
##
## studentized Breusch-Pagan test
##
## data: model1
## BP = 0.049298, df = 1, p-value = 0.8243
bptest(model2)
##
## studentized Breusch-Pagan test
##
## data: model2
## BP = 0.88072, df = 2, p-value = 0.6438
shapiro.test(residuals(model1))
##
## Shapiro-Wilk normality test
##
## data: residuals(model1)
## W = 0.92337, p-value = 0.02568
shapiro.test(residuals(model2))
##
## Shapiro-Wilk normality test
##
## data: residuals(model2)
## W = 0.92792, p-value = 0.03427
model1 <- lm(mpg ~ hp, data = mtcars)
hist(model1$residuals,
main = "Histogram Residual",
xlab = "Residuals",
col = "lightblue",
border = "black")
model2 <- lm(mpg ~ hp + wt, data = mtcars)
hist(model2$residuals,
main = "Histogram Residual",
xlab = "Residuals",
col = "purple",
border = "black")
qqnorm(residuals(model1)); qqline(residuals(model1))
qqnorm(residuals(model2)); qqline(residuals(model2))
cor(mtcars[, c("hp", "wt", "disp")])
## hp wt disp
## hp 1.0000000 0.6587479 0.7909486
## wt 0.6587479 1.0000000 0.8879799
## disp 0.7909486 0.8879799 1.0000000
Model Sederhana lebih mudah dipahami, bebas dari multikolinearitas, tapi daya jelasinya terbatas.Model Berganda menjelaskan variasi mpg lebih baik (R² lebih tinggi, residual lebih normal), tetapi perlu hati-hati terhadap multikolinearitas antar prediktor.
Pilihan model tergantung tujuan:
Kalau mau sederhana & interpretasi jelas, pakai model sederhana.Kalau mau prediksi lebih akurat, pakai model berganda, tapi pastikan asumsi (terutama multikolinearitas) terpenuhi.