## Input nama Provinsi
PROVINSI <- c(
  "ACEH","SUMATERA UTARA","SUMATERA BARAT","RIAU","JAMBI","SUMATERA SELATAN",
  "BENGKULU","LAMPUNG","KEP. BANGKA BELITUNG","KEP. RIAU",
  "DKI JAKARTA","JAWA BARAT","JAWA TENGAH","DI YOGYAKARTA","JAWA TIMUR",
  "BANTEN","BALI","NUSA TENGGARA BARAT","NUSA TENGGARA TIMUR",
  "KALIMANTAN BARAT","KALIMANTAN TENGAH","KALIMANTAN SELATAN",
  "KALIMANTAN TIMUR","KALIMANTAN UTARA",
  "SULAWESI UTARA","SULAWESI TENGAH","SULAWESI SELATAN","SULAWESI TENGGARA",
  "GORONTALO","SULAWESI BARAT",
  "MALUKU","MALUKU UTARA","PAPUA BARAT","PAPUA"
)

# Variabel respon
Y <- c(
  72.8,72.71,73.26,73.52,72.14,70.9,72.16,70.45,72.24,76.46,
  81.65,73.12,72.79,80.64,72.75,73.32,76.44,69.46,65.9,
  68.63,71.63,71.84,77.44,71.83,73.81,70.28,72.82,72.23,
  69.81,66.92,70.22,69.47,65.89,61.37
)

# Variabel bebas
X1 <- c(
  10.46,6.83,8.68,7.42,7.46,7.08,8.02,6.92,3.37,4.95,
  2.71,5.62,5.39,4.76,5.77,4.57,3.93,13.3,12.5,
  7.21,5.38,6.76,4.81,6.19,6.27,7.62,6.23,8.14,
  6.1,13.04,11.8,9.28,12.29,5.85
)

X2 <- c(
  70.67,77.16,65.96,66.91,65.85,67.07,64.88,62.42,66.87,73.93,
  87.71,67.05,58.75,87.92,66.87,66.02,76.59,61,38.47,
  58.4,61.88,67.81,74,54.8,66.66,53.73,68.32,65.97,
  45.12,55.18,72.08,67.1,57.07,39.01
)

X3 <- c(
  38.55,40.93,34.74,45.98,40.02,36.72,32.46,28.49,50.37,63.68,
  63.12,45.39,39.84,46.62,36.14,52.04,46.57,24.64,24.76,
  41.23,48.5,42.7,55.74,51,40.85,32.13,36.45,37.61,
  35.03,22.75,36.7,34.37,39.72,15.89
)

X4 <- c(
  44.45,30.94,43.79,35.29,30.08,26.31,38.15,21.48,14.85,27.47,
  39.56,26.01,23.95,75.59,30.07,32.67,38.46,32.05,32.48,
  26.59,25.84,27.5,40.62,25.66,34.36,39.48,42.63,45.24,
  36.94,29.43,51.36,44.27,36.11,20.08
)

X5 <- c(
  85.72,92.83,94.21,94.95,93.63,92.77,94.03,93.98,96.11,96.52,
  97.37,95.61,97.44,98.74,95.34,94.49,96.69,91.58,77.3,
  89.49,91.32,96.28,97.57,95.31,91.69,86.96,94.81,92.82,
  93.06,90.41,78.51,77.48,76.29,33.06
)

# Membuat data frame
data <- data.frame(PROVINSI, Y, X1, X2, X3, X4, X5)

##  Eksplorasi Data
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'GGally' was built under R version 4.4.3
##    PROVINSI               Y               X1               X2       
##  Length:34          Min.   :61.37   Min.   : 2.710   Min.   :38.47  
##  Class :character   1st Qu.:70.23   1st Qu.: 5.447   1st Qu.:59.31  
##  Mode  :character   Median :72.19   Median : 6.795   Median :66.34  
##                     Mean   :71.97   Mean   : 7.256   Mean   :64.68  
##                     3rd Qu.:73.22   3rd Qu.: 8.110   3rd Qu.:68.19  
##                     Max.   :81.65   Max.   :13.300   Max.   :87.92  
##        X3              X4              X5       
##  Min.   :15.89   Min.   :14.85   Min.   :33.06  
##  1st Qu.:34.81   1st Qu.:26.81   1st Qu.:90.64  
##  Median :39.78   Median :32.58   Median :93.81  
##  Mean   :40.05   Mean   :34.40   Mean   :90.13  
##  3rd Qu.:46.42   3rd Qu.:39.54   3rd Qu.:95.54  
##  Max.   :63.68   Max.   :75.59   Max.   :98.74
# Mengatur layout 2x2 untuk menampilkan 4 scatterplot
par(mfrow=c(2,2))

# Y vs X1
plot(data$X1, data$Y,
     main="Scatterplot Y vs X1",
     xlab="Indeks Demokrasi Indonesia (X1)", 
     ylab="IPM (Y)",
     pch=19, col="pink")
abline(lm(Y ~ X1, data=data), col="lightblue", lwd=2)

# Y vs X2
plot(data$X2, data$Y,
     main="Scatterplot Y vs X2",
     xlab="APK Perguruan Tinggi (X2)", 
     ylab="IPM (Y)",
     pch=19, col="pink")
abline(lm(Y ~ X2, data=data), col="lightblue", lwd=2)

# Y vs X3
plot(data$X3, data$Y,
     main="Scatterplot Y vs X3",
     xlab="Tenaga Kerja Formal (X3)", 
     ylab="IPM (Y)",
     pch=19, col="pink")
abline(lm(Y ~ X3, data=data), col="lightblue", lwd=2)

# Y vs X4
plot(data$X4, data$Y,
     main="Scatterplot Y vs X4",
     xlab="Akses Layanan Sanitasi Dasar (X4)", 
     ylab="IPM (Y)",
     pch=19, col="pink")
abline(lm(Y ~ X4, data=data), col="lightblue", lwd=2)

# Matriks Korelasi Pearson (Y, X1, X2, X3, X4)
cor_matrix <- cor(data[, c("Y", "X1", "X2", "X3", "X4")])
cor_matrix
##             Y         X1         X2          X3         X4
## Y   1.0000000 -0.5976935  0.8573490  0.77470259 0.42634038
## X1 -0.5976935  1.0000000 -0.3913953 -0.63967858 0.13804593
## X2  0.8573490 -0.3913953  1.0000000  0.64405802 0.46471069
## X3  0.7747026 -0.6396786  0.6440580  1.00000000 0.06954047
## X4  0.4263404  0.1380459  0.4647107  0.06954047 1.00000000

Uji Asumsi Model Awal

# Membuat model regresi linier berganda
model <- lm(Y ~ X1 + X2 + X3 + X4, data = data)
model
## 
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4, data = data)
## 
## Coefficients:
## (Intercept)           X1           X2           X3           X4  
##    56.79109     -0.36655      0.16434      0.11005      0.08134
# 1) Normalitas residual: Shapiro-Wilk dan QQ-plot
shapiro <- shapiro.test(residuals(model))
shapiro
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model)
## W = 0.94543, p-value = 0.0896
qqnorm(residuals(model)); qqline(residuals(model))

# 2) Homokedastisitas: Breusch-Pagan test
bptest_result <- lmtest::bptest(model)
bptest_result
## 
##  studentized Breusch-Pagan test
## 
## data:  model
## BP = 9.3773, df = 4, p-value = 0.05233
# 3) Multikolinearitas: VIF
vif_values <- car::vif(model)
vif_values
##       X1       X2       X3       X4 
## 1.819741 2.485802 2.516918 1.547638
# 4) Autokorelasi: Durbin-Watson test
dw <- lmtest::dwtest(model)
dw
## 
##  Durbin-Watson test
## 
## data:  model
## DW = 1.1269, p-value = 0.002074
## alternative hypothesis: true autocorrelation is greater than 0

Estimasi Model

# Summary model
summary_model <- summary(model)

# Prediksi dan plot Prediksi vs Aktual
pred <- predict(model)
pred_df <- data.frame(aktual = data$Y, prediksi = pred, resid = residuals(model))

library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.4.3
ggplot(pred_df, aes(x = aktual, y = prediksi)) +
  geom_point() +
  geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
  geom_text_repel(aes(label = seq_along(aktual)), size = 3) +
  labs(title = "Plot Prediksi vs Aktual", x = "Aktual (Y)", y = "Prediksi (Yhat)") +
  theme_minimal()

# Tabel koefisien dengan p-value
knitr::kable(broom::tidy(model), digits = 4, caption = "Koefisien Estimasi Model")
Koefisien Estimasi Model
term estimate std.error statistic p.value
(Intercept) 56.7911 2.2736 24.9782 0.0000
X1 -0.3665 0.1247 -2.9402 0.0064
X2 0.1643 0.0371 4.4328 0.0001
X3 0.1101 0.0383 2.8728 0.0075
X4 0.0813 0.0289 2.8120 0.0087

Pengujian Hipotesis

# Uji F (ANOVA)
anova_model <- anova(model)
anova_model
## Analysis of Variance Table
## 
## Response: Y
##           Df  Sum Sq Mean Sq  F value    Pr(>F)    
## X1         1 179.529 179.529  83.4344 4.940e-10 ***
## X2         1 230.645 230.645 107.1905 2.985e-11 ***
## X3         1  12.958  12.958   6.0220  0.020373 *  
## X4         1  17.015  17.015   7.9076  0.008739 ** 
## Residuals 29  62.400   2.152                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Alternatif: uji F melalui summary (F-statistic)
summary_model$fstatistic
##   value   numdf   dendf 
## 51.1386  4.0000 29.0000
# Uji parsial: t-test sudah tersedia pada summary(model) -> lihat koefisien
# Untuk uji kontribusi parsial variabel, dapat juga dilakukan drop1 (partial F)
partial_drop <- drop1(model, test = "F")
partial_drop
## Single term deletions
## 
## Model:
## Y ~ X1 + X2 + X3 + X4
##        Df Sum of Sq     RSS    AIC F value    Pr(>F)    
## <none>               62.400 30.645                      
## X1      1    18.601  81.002 37.516  8.6448 0.0063803 ** 
## X2      1    42.281 104.681 46.235 19.6497 0.0001222 ***
## X3      1    17.759  80.159 37.160  8.2532 0.0075330 ** 
## X4      1    17.015  79.415 36.843  7.9076 0.0087389 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Evaluasi Model

# Goodness of fit
r2 <- summary_model$r.squared
adjr2 <- summary_model$adj.r.squared
rmse <- sqrt(mean(residuals(model)^2))
cat(sprintf("R-squared: %.4f
Adjusted R-squared: %.4f
RMSE: %.4f
", r2, adjr2, rmse))
## R-squared: 0.8758
## Adjusted R-squared: 0.8587
## RMSE: 1.3547
# Plot regresi (diagnostik)
par(mfcol = c(2,2))
plot(model)

# Ringkasan evaluasi model
data.frame(Metric = c("R-squared","Adjusted R-squared","RMSE"),
           Value = c(r2, adjr2, rmse))
##               Metric     Value
## 1          R-squared 0.8758319
## 2 Adjusted R-squared 0.8587053
## 3               RMSE 1.3547330