Data yang digunakan
library(leaps)
## Warning: package 'leaps' was built under R version 4.3.3
library(caret)
## Warning: package 'caret' was built under R version 4.3.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.3
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 4.3.2
library(readxl)
## Warning: package 'readxl' was built under R version 4.3.3
data <- read_excel("D:/Download/Data Tes Skala Besar.xlsx")
data
## # A tibble: 19 × 22
## `Kabupaten/_Kota` Peranan_Ekonomi_Daerah Pertumbuhan_Ekonomi PDRB TPT
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Kab. Kep. Mentawai 1.88 3.97 6246. 1.44
## 2 Kab. Pesisir Selatan 5.77 3.84 19182. 5.06
## 3 Kab. Solok 5.54 3.91 18392. 4.91
## 4 Kab. Sijunjung 3.76 3.89 12489. 4.73
## 5 Kab. Tanah datar 5.27 3.85 17496. 5.3
## 6 Kab. Padang Pariaman 8.07 4.57 26809. 6.59
## 7 Kab. Agam 8.42 4.12 27976. 4.73
## 8 Kab. Lima Puluh Kota 6.28 4.03 20851. 3.68
## 9 Kab. Pasaman 3.66 3.95 12165. 5.25
## 10 Kab. Solok Selatan 2.28 4.09 7588. 2.3
## 11 Kab. Dharmasraya 4.31 3.86 14328. 6.02
## 12 Kab. Pasaman Barat 6.36 4.08 21116. 6.34
## 13 Kota Padang 25.3 4.65 84183. 9.88
## 14 Kota Solok 1.67 4.55 5561. 3.62
## 15 Kota Sawahlunto 1.52 4.02 5058 5.55
## 16 Kota Padang Panjang 1.43 4.52 4757. 4.94
## 17 Kota Bukittinggi 3.54 4.52 11747. 4.72
## 18 Kota Payakumbuh 2.84 4.53 9421 4.87
## 19 Kota Pariaman 2.05 4.51 6802. 5.32
## # ℹ 17 more variables: Laju_Pertumbuhan_Penduduk <dbl>,
## # Kepadatan_Penduduk <dbl>, TPAK <dbl>, Rata_Rata_Gaji_Pekerja_Formal <dbl>,
## # Rata_Rata_Gaji_Pekerja_Informal <dbl>,
## # Indeks_Pembangunan_Literasi_Masyarakat <dbl>, IPM <dbl>,
## # Persentase_Penduduk_Miskin <dbl>,
## # Persentase_Penduduk_Tingkat_Pendidikan_Perguruan_Tinggi <dbl>, IKP <dbl>,
## # Produksi_Padi <dbl>, Produksi_Kelapa_Sawit <dbl>, Produksi_Kelapa <dbl>, …
Deskripsi Data
cat("Dimensi Data:", dim(data),"\n")
## Dimensi Data: 19 22
cat("Rasio p/n: ", (ncol(data)-2)/nrow(data),"\n")
## Rasio p/n: 1.052632
Statistika Deskriptif
summary(data)
## Kabupaten/_Kota Peranan_Ekonomi_Daerah Pertumbuhan_Ekonomi PDRB
## Length:19 Min. : 1.430 Min. :3.840 Min. : 4757
## Class :character 1st Qu.: 2.165 1st Qu.:3.930 1st Qu.: 7195
## Mode :character Median : 3.760 Median :4.080 Median :12489
## Mean : 5.263 Mean :4.182 Mean :17482
## 3rd Qu.: 6.025 3rd Qu.:4.520 3rd Qu.:20016
## Max. :25.340 Max. :4.650 Max. :84183
## TPT Laju_Pertumbuhan_Penduduk Kepadatan_Penduduk TPAK
## Min. :1.440 Min. :0.870 Min. : 15.50 Min. :65.00
## 1st Qu.:4.725 1st Qu.:1.285 1st Qu.: 85.84 1st Qu.:67.75
## Median :4.940 Median :1.460 Median : 251.23 Median :70.12
## Mean :5.013 Mean :1.451 Mean : 833.80 Mean :70.69
## 3rd Qu.:5.435 3rd Qu.:1.560 3rd Qu.:1363.70 3rd Qu.:72.84
## Max. :9.880 Max. :2.120 Max. :5171.68 Max. :80.07
## Rata_Rata_Gaji_Pekerja_Formal Rata_Rata_Gaji_Pekerja_Informal
## Min. :1950304 Min. : 884565
## 1st Qu.:2326228 1st Qu.:1581916
## Median :2724638 Median :1715280
## Mean :2692917 Mean :1723315
## 3rd Qu.:2953680 3rd Qu.:1918193
## Max. :3306210 Max. :2304160
## Indeks_Pembangunan_Literasi_Masyarakat IPM
## Min. :45.67 Min. :63.99
## 1st Qu.:52.70 1st Qu.:71.08
## Median :70.99 Median :74.06
## Mean :67.64 Mean :74.62
## 3rd Qu.:82.78 3rd Qu.:79.44
## Max. :89.61 Max. :83.99
## Persentase_Penduduk_Miskin
## Min. : 2.330
## 1st Qu.: 4.270
## Median : 5.780
## Mean : 5.931
## 3rd Qu.: 6.875
## Max. :13.890
## Persentase_Penduduk_Tingkat_Pendidikan_Perguruan_Tinggi IKP
## Min. : 7.350 Min. :53.86
## 1st Qu.: 9.725 1st Qu.:82.57
## Median :11.770 Median :85.08
## Mean :13.472 Mean :84.32
## 3rd Qu.:17.640 3rd Qu.:88.19
## Max. :24.670 Max. :92.90
## Produksi_Padi Produksi_Kelapa_Sawit Produksi_Kelapa Produksi_Karet
## Min. : 0.4699 Min. : 0.0 Min. : 2.22 Min. : 0.0
## 1st Qu.: 13.9865 1st Qu.: 15.9 1st Qu.: 860.02 1st Qu.: 17.1
## Median : 49.1827 Median : 832.5 Median : 1823.57 Median : 2389.0
## Mean : 71.3931 Mean : 37637.8 Mean : 4212.98 Mean : 8046.4
## 3rd Qu.:134.6262 3rd Qu.: 29818.3 3rd Qu.: 2747.59 3rd Qu.: 9703.1
## Max. :169.0978 Max. :373763.0 Max. :39233.86 Max. :36770.1
## Produksi_Kopi Produksi_Kakao Produksi_Jagung
## Min. : 0.0 Min. : 2.98 Min. : 0.0000
## 1st Qu.: 23.3 1st Qu.: 181.38 1st Qu.: 0.6056
## Median : 181.6 Median : 892.89 Median : 4.9690
## Mean : 672.7 Mean : 1755.84 Mean : 38.5947
## 3rd Qu.:1126.3 3rd Qu.: 1616.72 3rd Qu.: 53.0984
## Max. :3578.5 Max. :12388.46 Max. :241.8159
Splitting Data
set.seed(11)
split_index = createDataPartition(data$Peranan_Ekonomi_Daerah, p = 0.7, list = FALSE)
train_data <- data[split_index,]
test_data <- data[-split_index,]
cat("Training set:", nrow(train_data), "Kabupaten/Kota \n")
## Training set: 15 Kabupaten/Kota
cat("Testing set:", nrow(test_data), "Kabupaten/Kota \n")
## Testing set: 4 Kabupaten/Kota
cat("Rasio Train/Test:", nrow(train_data)/nrow(data),":",
nrow(test_data)/nrow(data))
## Rasio Train/Test: 0.7894737 : 0.2105263
Model Exhaustive Search Selection
predictor_train = train_data[,3:ncol(train_data)]
response_train = train_data$Peranan_Ekonomi_Daerah
best_model = regsubsets(predictor_train, response_train, method = "exhaustive")
## Warning in leaps.setup(x, y, wt = weights, nbest = nbest, nvmax = nvmax, : 6
## linear dependencies found
results = summary(best_model)
results
## Subset selection object
## 20 Variables (and intercept)
## Forced in Forced out
## Pertumbuhan_Ekonomi FALSE FALSE
## PDRB FALSE FALSE
## TPT FALSE FALSE
## Laju_Pertumbuhan_Penduduk FALSE FALSE
## Kepadatan_Penduduk FALSE FALSE
## TPAK FALSE FALSE
## Rata_Rata_Gaji_Pekerja_Formal FALSE FALSE
## Rata_Rata_Gaji_Pekerja_Informal FALSE FALSE
## Indeks_Pembangunan_Literasi_Masyarakat FALSE FALSE
## IPM FALSE FALSE
## Persentase_Penduduk_Miskin FALSE FALSE
## Persentase_Penduduk_Tingkat_Pendidikan_Perguruan_Tinggi FALSE FALSE
## IKP FALSE FALSE
## Produksi_Padi FALSE FALSE
## Produksi_Kelapa_Sawit FALSE FALSE
## Produksi_Kelapa FALSE FALSE
## Produksi_Karet FALSE FALSE
## Produksi_Kopi FALSE FALSE
## Produksi_Kakao FALSE FALSE
## Produksi_Jagung FALSE FALSE
## 1 subsets of each size up to 8
## Selection Algorithm: exhaustive
## Pertumbuhan_Ekonomi PDRB TPT Laju_Pertumbuhan_Penduduk
## 1 ( 1 ) " " "*" " " " "
## 2 ( 1 ) " " "*" " " "*"
## 3 ( 1 ) " " "*" " " "*"
## 4 ( 1 ) " " "*" " " "*"
## 5 ( 1 ) " " "*" " " "*"
## 6 ( 1 ) " " "*" " " "*"
## 7 ( 1 ) " " "*" "*" "*"
## 8 ( 1 ) " " "*" "*" "*"
## Kepadatan_Penduduk TPAK Rata_Rata_Gaji_Pekerja_Formal
## 1 ( 1 ) " " " " " "
## 2 ( 1 ) " " " " " "
## 3 ( 1 ) " " " " " "
## 4 ( 1 ) " " " " " "
## 5 ( 1 ) " " " " " "
## 6 ( 1 ) " " " " " "
## 7 ( 1 ) " " " " " "
## 8 ( 1 ) " " " " "*"
## Rata_Rata_Gaji_Pekerja_Informal Indeks_Pembangunan_Literasi_Masyarakat
## 1 ( 1 ) " " " "
## 2 ( 1 ) " " " "
## 3 ( 1 ) " " " "
## 4 ( 1 ) " " "*"
## 5 ( 1 ) "*" "*"
## 6 ( 1 ) "*" "*"
## 7 ( 1 ) " " "*"
## 8 ( 1 ) " " "*"
## IPM Persentase_Penduduk_Miskin
## 1 ( 1 ) " " " "
## 2 ( 1 ) " " " "
## 3 ( 1 ) " " " "
## 4 ( 1 ) " " " "
## 5 ( 1 ) " " " "
## 6 ( 1 ) " " " "
## 7 ( 1 ) " " " "
## 8 ( 1 ) " " " "
## Persentase_Penduduk_Tingkat_Pendidikan_Perguruan_Tinggi IKP
## 1 ( 1 ) " " " "
## 2 ( 1 ) " " " "
## 3 ( 1 ) " " " "
## 4 ( 1 ) " " " "
## 5 ( 1 ) " " " "
## 6 ( 1 ) "*" " "
## 7 ( 1 ) " " "*"
## 8 ( 1 ) " " "*"
## Produksi_Padi Produksi_Kelapa_Sawit Produksi_Kelapa Produksi_Karet
## 1 ( 1 ) " " " " " " " "
## 2 ( 1 ) " " " " " " " "
## 3 ( 1 ) " " " " " " " "
## 4 ( 1 ) " " " " " " " "
## 5 ( 1 ) " " " " " " " "
## 6 ( 1 ) " " " " " " " "
## 7 ( 1 ) " " " " " " " "
## 8 ( 1 ) " " "*" " " " "
## Produksi_Kopi Produksi_Kakao Produksi_Jagung
## 1 ( 1 ) " " " " " "
## 2 ( 1 ) " " " " " "
## 3 ( 1 ) "*" " " " "
## 4 ( 1 ) "*" " " " "
## 5 ( 1 ) "*" " " " "
## 6 ( 1 ) "*" " " " "
## 7 ( 1 ) "*" " " "*"
## 8 ( 1 ) "*" " " " "
Model Terbaik
best_model_idx = which.min(results$bic)
coef_names <- names(coef(best_model, best_model_idx))
selected_variable <- coef_names[coef_names != "(Intercept)"]
cat("Model Terbaik:", best_model_idx, "Variabel \n")
## Model Terbaik: 8 Variabel
cat("Variabel Terpilih:", paste(selected_variable, collapse = ", "), "\n")
## Variabel Terpilih: PDRB, TPT, Laju_Pertumbuhan_Penduduk, Rata_Rata_Gaji_Pekerja_Formal, Indeks_Pembangunan_Literasi_Masyarakat, IKP, Produksi_Kelapa_Sawit, Produksi_Kopi
cat("BIC Terbaik:", round(min(results$bic),2), "\n")
## BIC Terbaik: -270.98
Model Final
formula_final <- reformulate(termlabels = selected_variable,
response = "Peranan_Ekonomi_Daerah")
print(formula_final)
## Peranan_Ekonomi_Daerah ~ PDRB + TPT + Laju_Pertumbuhan_Penduduk +
## Rata_Rata_Gaji_Pekerja_Formal + Indeks_Pembangunan_Literasi_Masyarakat +
## IKP + Produksi_Kelapa_Sawit + Produksi_Kopi
final_model <- lm(formula_final, data = train_data)
summary(final_model)
##
## Call:
## lm(formula = formula_final, data = train_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.445e-04 -1.601e-04 -4.704e-05 6.857e-05 9.235e-04
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.415e-03 1.440e-03 4.454 0.004308
## PDRB 3.011e-04 1.365e-08 22057.342 < 2e-16
## TPT -1.625e-03 1.925e-04 -8.439 0.000151
## Laju_Pertumbuhan_Penduduk -9.286e-03 5.706e-04 -16.275 3.43e-06
## Rata_Rata_Gaji_Pekerja_Formal -1.998e-09 5.392e-10 -3.706 0.010019
## Indeks_Pembangunan_Literasi_Masyarakat -1.448e-04 1.139e-05 -12.718 1.45e-05
## IKP 3.722e-04 3.335e-05 11.161 3.09e-05
## Produksi_Kelapa_Sawit 1.097e-08 1.654e-09 6.629 0.000568
## Produksi_Kopi -2.519e-06 1.781e-07 -14.145 7.80e-06
##
## (Intercept) **
## PDRB ***
## TPT ***
## Laju_Pertumbuhan_Penduduk ***
## Rata_Rata_Gaji_Pekerja_Formal *
## Indeks_Pembangunan_Literasi_Masyarakat ***
## IKP ***
## Produksi_Kelapa_Sawit ***
## Produksi_Kopi ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.000475 on 6 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 2.669e+08 on 8 and 6 DF, p-value: < 2.2e-16
Evaluasi Model
pred_test <- predict(final_model, newdata = test_data)
eval_metrics <- function(actual, predicted) {
e <- actual - predicted
rmse <- sqrt(mean(e^2))
mae <- mean(abs(e))
mape <- mean(abs(e/actual)) * 100
r2 <- 1 - (sum(e^2) / sum((actual - mean(actual))^2))
return(c(RMSE = rmse, MAE = mae, MAPE = mape, R_Squared = r2))
}
performance <- eval_metrics(test_data$Peranan_Ekonomi_Daerah, pred_test)
print("Indikator Kebaikan Model (Data Testing):")
## [1] "Indikator Kebaikan Model (Data Testing):"
print(round(performance, 4))
## RMSE MAE MAPE R_Squared
## 0.0040 0.0037 0.0980 1.0000
Plot Perbandingan Prediksi dan Aktual
plot(test_data$Peranan_Ekonomi_Daerah, pred_test,
main = "Actual vs Predicted (Data Testing)",
xlab = "Aktual", ylab = "Prediksi", pch = 19, col = "blue")
abline(0, 1, col = "red", lwd = 2)
