## Input nama Provinsi
PROVINSI <- c(
"ACEH","SUMATERA UTARA","SUMATERA BARAT","RIAU","JAMBI","SUMATERA SELATAN",
"BENGKULU","LAMPUNG","KEP. BANGKA BELITUNG","KEP. RIAU",
"DKI JAKARTA","JAWA BARAT","JAWA TENGAH","DI YOGYAKARTA","JAWA TIMUR",
"BANTEN","BALI","NUSA TENGGARA BARAT","NUSA TENGGARA TIMUR",
"KALIMANTAN BARAT","KALIMANTAN TENGAH","KALIMANTAN SELATAN",
"KALIMANTAN TIMUR","KALIMANTAN UTARA",
"SULAWESI UTARA","SULAWESI TENGAH","SULAWESI SELATAN","SULAWESI TENGGARA",
"GORONTALO","SULAWESI BARAT",
"MALUKU","MALUKU UTARA","PAPUA BARAT","PAPUA"
)
# Variabel respon
Y <- c(
72.8,72.71,73.26,73.52,72.14,70.9,72.16,70.45,72.24,76.46,
81.65,73.12,72.79,80.64,72.75,73.32,76.44,69.46,65.9,
68.63,71.63,71.84,77.44,71.83,73.81,70.28,72.82,72.23,
69.81,66.92,70.22,69.47,65.89,61.37
)
# Variabel bebas
X1 <- c(
10.46,6.83,8.68,7.42,7.46,7.08,8.02,6.92,3.37,4.95,
2.71,5.62,5.39,4.76,5.77,4.57,3.93,13.3,12.5,
7.21,5.38,6.76,4.81,6.19,6.27,7.62,6.23,8.14,
6.1,13.04,11.8,9.28,12.29,5.85
)
X2 <- c(
70.67,77.16,65.96,66.91,65.85,67.07,64.88,62.42,66.87,73.93,
87.71,67.05,58.75,87.92,66.87,66.02,76.59,61,38.47,
58.4,61.88,67.81,74,54.8,66.66,53.73,68.32,65.97,
45.12,55.18,72.08,67.1,57.07,39.01
)
X3 <- c(
38.55,40.93,34.74,45.98,40.02,36.72,32.46,28.49,50.37,63.68,
63.12,45.39,39.84,46.62,36.14,52.04,46.57,24.64,24.76,
41.23,48.5,42.7,55.74,51,40.85,32.13,36.45,37.61,
35.03,22.75,36.7,34.37,39.72,15.89
)
X4 <- c(
44.45,30.94,43.79,35.29,30.08,26.31,38.15,21.48,14.85,27.47,
39.56,26.01,23.95,75.59,30.07,32.67,38.46,32.05,32.48,
26.59,25.84,27.5,40.62,25.66,34.36,39.48,42.63,45.24,
36.94,29.43,51.36,44.27,36.11,20.08
)
X5 <- c(
85.72,92.83,94.21,94.95,93.63,92.77,94.03,93.98,96.11,96.52,
97.37,95.61,97.44,98.74,95.34,94.49,96.69,91.58,77.3,
89.49,91.32,96.28,97.57,95.31,91.69,86.96,94.81,92.82,
93.06,90.41,78.51,77.48,76.29,33.06
)
# Membuat data frame
data <- data.frame(PROVINSI, Y, X1, X2, X3, X4, X5)
## Eksplorasi Data
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'GGally' was built under R version 4.4.3
## PROVINSI Y X1 X2
## Length:34 Min. :61.37 Min. : 2.710 Min. :38.47
## Class :character 1st Qu.:70.23 1st Qu.: 5.447 1st Qu.:59.31
## Mode :character Median :72.19 Median : 6.795 Median :66.34
## Mean :71.97 Mean : 7.256 Mean :64.68
## 3rd Qu.:73.22 3rd Qu.: 8.110 3rd Qu.:68.19
## Max. :81.65 Max. :13.300 Max. :87.92
## X3 X4 X5
## Min. :15.89 Min. :14.85 Min. :33.06
## 1st Qu.:34.81 1st Qu.:26.81 1st Qu.:90.64
## Median :39.78 Median :32.58 Median :93.81
## Mean :40.05 Mean :34.40 Mean :90.13
## 3rd Qu.:46.42 3rd Qu.:39.54 3rd Qu.:95.54
## Max. :63.68 Max. :75.59 Max. :98.74
# Mengatur layout 2x2 untuk menampilkan 4 scatterplot
par(mfrow=c(2,2))
# Y vs X1
plot(data$X1, data$Y,
main="Scatterplot Y vs X1",
xlab="Indeks Demokrasi Indonesia (X1)",
ylab="IPM (Y)",
pch=19, col="pink")
abline(lm(Y ~ X1, data=data), col="lightblue", lwd=2)
# Y vs X2
plot(data$X2, data$Y,
main="Scatterplot Y vs X2",
xlab="APK Perguruan Tinggi (X2)",
ylab="IPM (Y)",
pch=19, col="pink")
abline(lm(Y ~ X2, data=data), col="lightblue", lwd=2)
# Y vs X3
plot(data$X3, data$Y,
main="Scatterplot Y vs X3",
xlab="Tenaga Kerja Formal (X3)",
ylab="IPM (Y)",
pch=19, col="pink")
abline(lm(Y ~ X3, data=data), col="lightblue", lwd=2)
# Y vs X4
plot(data$X4, data$Y,
main="Scatterplot Y vs X4",
xlab="Akses Layanan Sanitasi Dasar (X4)",
ylab="IPM (Y)",
pch=19, col="pink")
abline(lm(Y ~ X4, data=data), col="lightblue", lwd=2)

# Matriks Korelasi Pearson (Y, X1, X2, X3, X4)
cor_matrix <- cor(data[, c("Y", "X1", "X2", "X3", "X4")])
cor_matrix
## Y X1 X2 X3 X4
## Y 1.0000000 -0.5976935 0.8573490 0.77470259 0.42634038
## X1 -0.5976935 1.0000000 -0.3913953 -0.63967858 0.13804593
## X2 0.8573490 -0.3913953 1.0000000 0.64405802 0.46471069
## X3 0.7747026 -0.6396786 0.6440580 1.00000000 0.06954047
## X4 0.4263404 0.1380459 0.4647107 0.06954047 1.00000000
Uji Asumsi Model Awal
# Membuat model regresi linier berganda
model <- lm(Y ~ X1 + X2 + X3 + X4, data = data)
model
##
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4, data = data)
##
## Coefficients:
## (Intercept) X1 X2 X3 X4
## 56.79109 -0.36655 0.16434 0.11005 0.08134
# 1) Normalitas residual: Shapiro-Wilk dan QQ-plot
shapiro <- shapiro.test(residuals(model))
shapiro
##
## Shapiro-Wilk normality test
##
## data: residuals(model)
## W = 0.94543, p-value = 0.0896
qqnorm(residuals(model)); qqline(residuals(model))

# 2) Homokedastisitas: Breusch-Pagan test
bptest_result <- lmtest::bptest(model)
bptest_result
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 9.3773, df = 4, p-value = 0.05233
# 3) Multikolinearitas: VIF
vif_values <- car::vif(model)
vif_values
## X1 X2 X3 X4
## 1.819741 2.485802 2.516918 1.547638
# 4) Autokorelasi: Durbin-Watson test
dw <- lmtest::dwtest(model)
dw
##
## Durbin-Watson test
##
## data: model
## DW = 1.1269, p-value = 0.002074
## alternative hypothesis: true autocorrelation is greater than 0
Estimasi Model
# Summary model
summary_model <- summary(model)
# Prediksi dan plot Prediksi vs Aktual
pred <- predict(model)
pred_df <- data.frame(aktual = data$Y, prediksi = pred, resid = residuals(model))
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.4.3
ggplot(pred_df, aes(x = aktual, y = prediksi)) +
geom_point() +
geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
geom_text_repel(aes(label = seq_along(aktual)), size = 3) +
labs(title = "Plot Prediksi vs Aktual", x = "Aktual (Y)", y = "Prediksi (Yhat)") +
theme_minimal()

# Tabel koefisien dengan p-value
knitr::kable(broom::tidy(model), digits = 4, caption = "Koefisien Estimasi Model")
Koefisien Estimasi Model
| (Intercept) |
56.7911 |
2.2736 |
24.9782 |
0.0000 |
| X1 |
-0.3665 |
0.1247 |
-2.9402 |
0.0064 |
| X2 |
0.1643 |
0.0371 |
4.4328 |
0.0001 |
| X3 |
0.1101 |
0.0383 |
2.8728 |
0.0075 |
| X4 |
0.0813 |
0.0289 |
2.8120 |
0.0087 |
Pengujian Hipotesis
# Uji F (ANOVA)
anova_model <- anova(model)
anova_model
## Analysis of Variance Table
##
## Response: Y
## Df Sum Sq Mean Sq F value Pr(>F)
## X1 1 179.529 179.529 83.4344 4.940e-10 ***
## X2 1 230.645 230.645 107.1905 2.985e-11 ***
## X3 1 12.958 12.958 6.0220 0.020373 *
## X4 1 17.015 17.015 7.9076 0.008739 **
## Residuals 29 62.400 2.152
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Alternatif: uji F melalui summary (F-statistic)
summary_model$fstatistic
## value numdf dendf
## 51.1386 4.0000 29.0000
# Uji parsial: t-test sudah tersedia pada summary(model) -> lihat koefisien
# Untuk uji kontribusi parsial variabel, dapat juga dilakukan drop1 (partial F)
partial_drop <- drop1(model, test = "F")
partial_drop
## Single term deletions
##
## Model:
## Y ~ X1 + X2 + X3 + X4
## Df Sum of Sq RSS AIC F value Pr(>F)
## <none> 62.400 30.645
## X1 1 18.601 81.002 37.516 8.6448 0.0063803 **
## X2 1 42.281 104.681 46.235 19.6497 0.0001222 ***
## X3 1 17.759 80.159 37.160 8.2532 0.0075330 **
## X4 1 17.015 79.415 36.843 7.9076 0.0087389 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Evaluasi Model
# Goodness of fit
r2 <- summary_model$r.squared
adjr2 <- summary_model$adj.r.squared
rmse <- sqrt(mean(residuals(model)^2))
cat(sprintf("R-squared: %.4f
Adjusted R-squared: %.4f
RMSE: %.4f
", r2, adjr2, rmse))
## R-squared: 0.8758
## Adjusted R-squared: 0.8587
## RMSE: 1.3547
# Plot regresi (diagnostik)
par(mfcol = c(2,2))
plot(model)

# Ringkasan evaluasi model
data.frame(Metric = c("R-squared","Adjusted R-squared","RMSE"),
Value = c(r2, adjr2, rmse))
## Metric Value
## 1 R-squared 0.8758319
## 2 Adjusted R-squared 0.8587053
## 3 RMSE 1.3547330