alibaca <- rio::import("https://raw.githubusercontent.com/lutfi-reza/psd-lutfi-reza/main/data/data_indeks_alibaca_indonesia.csv")
Data yang digunakan adalah data Indeks Alibaca pada tahun 2019 yang diambil dari 34 provinsi di Indonesia. Data ini mengandung peubah respon (Y), yaitu Indeks Alibaca Indonesia dengan 16 peubah bebas.
Daftar peubah yang digunakan dalam model regresi. 1. Y: Indeks Alibaca 2. X1: Melek Huruf Latin 3. X2: Rata-rata Lama Sekolah 4. X3: Jumlah Perpustakaan Kondisi Baik 5. X4: Jumlah Petugas Pengelola Perpustakaan Sekolah 6. X5: Jumlah Perpustakaan Umum 7. X6: Jumlah TBM dan Pustaka Bergerak 8. X7: Persentase Rumah Tangga Membeli Surat kabar/ koran 9. X8: Persentase Rumah Tangga Membeli Majalah/ Tabloid 10. X9: Sekolah Memiliki Akses Internet 11. X10: Persentase penduduk mengakses internet 12. X11: Persentase penduduk menggunakan komputer 13. X12: Persentase penduduk membaca surat kabar/ koran atau majalah cetak 14. X13: Persentase penduduk membaca buku cetak kitab suci 15. X14: Persentase penduduk membaca berita/artikel dari media elektronik, internet 16. X15: Persentase penduduk mengunjungi perpustakaan 17. X16 :Persentase penduduk memanfaatkan taman baca
#Sebaran peubah Y (Indeks Alibaca)
hist(alibaca$Y, col = "coral1")
boxplot(alibaca$Y, col = "coral1")
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.3.1
## corrplot 0.92 loaded
m <- cor(alibaca)
library(RColorBrewer)
## Warning: package 'RColorBrewer' was built under R version 4.3.0
library(scales)
## Warning: package 'scales' was built under R version 4.3.1
colors <- alpha(colorRampPalette(c("blue4", #Warna Bawah
"#dbdbdb", #Warna Tengah
"firebrick3"))(10), #Warna Atas
alpha = 0.80) #Transparan
corrplot(m, method = 'ellipse', type='lower', order='original', col=colors)
corrplot(m, add=TRUE, method = 'number', type='upper', order='original',
tl.pos = "lt",
tl.col = "black", col=colors)
model <- lm(Y ~ X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+X12+X13+X14+X15+X16, data=alibaca)
summary(model)
##
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 +
## X10 + X11 + X12 + X13 + X14 + X15 + X16, data = alibaca)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0181 -0.8184 -0.2245 1.2482 2.2654
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.3433705 10.7029372 -0.499 0.62401
## X1 0.2090565 0.1643454 1.272 0.22048
## X2 -0.5296356 0.9320455 -0.568 0.57729
## X3 -0.0001485 0.0011432 -0.130 0.89819
## X4 0.0008446 0.0035603 0.237 0.81532
## X5 0.0016319 0.0013255 1.231 0.23504
## X6 0.0033949 0.0027086 1.253 0.22703
## X7 0.2348260 0.1675447 1.402 0.17904
## X8 0.2020553 0.3490530 0.579 0.57027
## X9 -0.0002589 0.0004798 -0.540 0.59651
## X10 0.2104490 0.2540063 0.829 0.41886
## X11 0.0804281 0.4239930 0.190 0.85180
## X12 -0.0531517 0.1409047 -0.377 0.71068
## X13 0.0405282 0.1830112 0.221 0.82738
## X14 0.4044468 0.2459826 1.644 0.11850
## X15 0.7421982 0.1484849 4.998 0.00011 ***
## X16 -2.4972977 1.4113310 -1.769 0.09475 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.822 on 17 degrees of freedom
## Multiple R-squared: 0.9726, Adjusted R-squared: 0.9468
## F-statistic: 37.68 on 16 and 17 DF, p-value: 3.846e-10
car::vif(model)
## X1 X2 X3 X4 X5 X6 X7 X8
## 4.014375 8.024387 30.182059 8.022691 3.983604 3.370940 11.933882 9.446274
## X9 X10 X11 X12 X13 X14 X15 X16
## 43.304435 54.948611 41.763450 6.585615 5.590614 33.396454 2.217583 1.965058
Terlihat terdapat beberapa peubah memiliki vif > 10 yang menandakan multikolinearitas: X3, X7, X9, X10, X11, dan X14
model4 <- lm(Y ~ X1+X2+X3+X4+X5+X6+X7+X8+X11+X12+X13+X15+X16, data=alibaca)
car::vif(model4)
## X1 X2 X3 X4 X5 X6 X7 X8
## 2.747707 6.759762 9.799118 7.392823 2.216318 2.698171 9.711510 9.280422
## X11 X12 X13 X15 X16
## 6.458430 4.412935 3.414155 2.182514 1.654662
Terlihat bahwa tidak terdapar multikolinearitas lagi pada peubah.
#Model 4 tanpa X9, X10, dan X14
##Kolmogorov-Smirnov Test
ks.test(model4$residuals, "pnorm", mean=mean(model4$residuals), sd=sd(model4$residuals))
##
## Exact one-sample Kolmogorov-Smirnov test
##
## data: model4$residuals
## D = 0.11375, p-value = 0.7288
## alternative hypothesis: two-sided
##Shapiro-Wilk Test
shapiro.test(model4$residuals)
##
## Shapiro-Wilk normality test
##
## data: model4$residuals
## W = 0.97614, p-value = 0.6481
Berdasarkan Kolmogorov-Smirnov test dan Shapiro-Wilk, residual data menyebar normal dengan p-value > 5%.
lmtest::bptest(model4)
##
## studentized Breusch-Pagan test
##
## data: model4
## BP = 12.124, df = 13, p-value = 0.5175
Karena p-value > 0.05 maka ragam sisaan homogen atau tidak terdapat masalah heteroskedastisitas
library(randtests)
## Warning: package 'randtests' was built under R version 4.3.0
runs.test(model4$residuals)
##
## Runs Test
##
## data: model4$residuals
## statistic = 0, runs = 18, n1 = 17, n2 = 17, n = 34, p-value = 1
## alternative hypothesis: nonrandomness
Karena p-value > 0.05 maka sisaan saling bebas.
t.test(model4$residuals,
mu = 0,
conf.level = 0.95)
##
## One Sample t-test
##
## data: model4$residuals
## t = -5.5823e-17, df = 33, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.5309331 0.5309331
## sample estimates:
## mean of x
## -1.456777e-17
Karena p-value > 0.05 maka nilai harapan sisaan sama dengan nol
bmodelselect <- step(model4, direction="backward", scope=formula(lm(Y ~ X1+X2+X3+X4+X5+X6+X7+X8+X11+X12+X13+X15+X16, alibaca)), trace=1)
## Start: AIC=55.53
## Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X11 + X12 + X13 +
## X15 + X16
##
## Df Sum of Sq RSS AIC
## - X4 1 0.039 76.449 53.549
## - X2 1 0.832 77.242 53.900
## - X8 1 0.953 77.363 53.953
## - X12 1 2.262 78.672 54.524
## - X13 1 3.237 79.647 54.942
## <none> 76.410 55.532
## - X3 1 4.950 81.360 55.666
## - X16 1 6.056 82.466 56.125
## - X6 1 7.388 83.798 56.670
## - X5 1 7.834 84.244 56.850
## - X1 1 10.061 86.471 57.737
## - X7 1 17.121 93.531 60.406
## - X15 1 78.977 155.387 77.665
## - X11 1 83.824 160.234 78.709
##
## Step: AIC=53.55
## Y ~ X1 + X2 + X3 + X5 + X6 + X7 + X8 + X11 + X12 + X13 + X15 +
## X16
##
## Df Sum of Sq RSS AIC
## - X2 1 0.822 77.271 51.913
## - X8 1 0.940 77.388 51.964
## - X12 1 2.389 78.838 52.595
## - X13 1 3.222 79.671 52.952
## <none> 76.449 53.549
## - X16 1 6.136 82.584 54.174
## - X6 1 7.604 84.052 54.773
## - X5 1 7.796 84.245 54.850
## - X1 1 10.034 86.482 55.742
## - X3 1 13.754 90.203 57.174
## - X7 1 17.086 93.535 58.407
## - X11 1 86.626 163.075 77.307
## - X15 1 91.230 167.678 78.253
##
## Step: AIC=51.91
## Y ~ X1 + X3 + X5 + X6 + X7 + X8 + X11 + X12 + X13 + X15 + X16
##
## Df Sum of Sq RSS AIC
## - X8 1 0.786 78.057 50.257
## - X12 1 2.336 79.607 50.925
## <none> 77.271 51.913
## - X13 1 5.366 82.637 52.195
## - X16 1 5.464 82.735 52.236
## - X5 1 7.218 84.489 52.949
## - X6 1 7.519 84.790 53.070
## - X1 1 9.268 86.540 53.764
## - X3 1 13.352 90.623 55.332
## - X7 1 16.354 93.625 56.440
## - X11 1 97.161 174.433 77.596
## - X15 1 98.452 175.723 77.847
##
## Step: AIC=50.26
## Y ~ X1 + X3 + X5 + X6 + X7 + X11 + X12 + X13 + X15 + X16
##
## Df Sum of Sq RSS AIC
## - X12 1 2.105 80.162 49.161
## <none> 78.057 50.257
## - X16 1 4.753 82.810 50.266
## - X13 1 5.403 83.460 50.532
## - X5 1 7.028 85.085 51.188
## - X1 1 8.510 86.567 51.775
## - X6 1 8.535 86.592 51.785
## - X3 1 13.690 91.747 53.751
## - X7 1 42.748 120.805 63.106
## - X15 1 99.715 177.772 76.241
## - X11 1 127.704 205.761 81.212
##
## Step: AIC=49.16
## Y ~ X1 + X3 + X5 + X6 + X7 + X11 + X13 + X15 + X16
##
## Df Sum of Sq RSS AIC
## - X13 1 4.210 84.371 48.901
## <none> 80.162 49.161
## - X16 1 5.066 85.228 49.245
## - X5 1 6.319 86.481 49.741
## - X1 1 7.380 87.541 50.155
## - X6 1 7.595 87.756 50.239
## - X3 1 12.650 92.812 52.143
## - X7 1 81.706 161.867 71.054
## - X15 1 117.429 197.590 77.834
## - X11 1 138.313 218.475 81.251
##
## Step: AIC=48.9
## Y ~ X1 + X3 + X5 + X6 + X7 + X11 + X15 + X16
##
## Df Sum of Sq RSS AIC
## - X5 1 3.327 87.699 48.217
## - X1 1 3.524 87.895 48.293
## - X16 1 5.045 89.417 48.876
## <none> 84.371 48.901
## - X6 1 8.190 92.561 50.051
## - X3 1 8.587 92.958 50.197
## - X7 1 84.949 169.320 70.585
## - X15 1 113.401 197.772 75.866
## - X11 1 134.336 218.708 79.287
##
## Step: AIC=48.22
## Y ~ X1 + X3 + X6 + X7 + X11 + X15 + X16
##
## Df Sum of Sq RSS AIC
## - X1 1 3.886 91.584 47.691
## <none> 87.699 48.217
## - X3 1 5.615 93.314 48.327
## - X16 1 6.940 94.639 48.806
## - X6 1 8.488 96.187 49.358
## - X7 1 81.672 169.370 68.595
## - X15 1 125.689 213.388 76.449
## - X11 1 133.286 220.985 77.639
##
## Step: AIC=47.69
## Y ~ X3 + X6 + X7 + X11 + X15 + X16
##
## Df Sum of Sq RSS AIC
## - X3 1 3.729 95.314 47.048
## - X16 1 5.505 97.089 47.675
## <none> 91.584 47.691
## - X6 1 5.821 97.406 47.786
## - X7 1 85.011 176.595 68.015
## - X15 1 137.414 228.999 76.850
## - X11 1 139.667 231.251 77.183
##
## Step: AIC=47.05
## Y ~ X6 + X7 + X11 + X15 + X16
##
## Df Sum of Sq RSS AIC
## - X6 1 2.166 97.480 45.812
## <none> 95.314 47.048
## - X16 1 6.240 101.554 47.204
## - X7 1 86.492 181.806 67.004
## - X11 1 137.080 232.394 75.351
## - X15 1 143.321 238.635 76.252
##
## Step: AIC=45.81
## Y ~ X7 + X11 + X15 + X16
##
## Df Sum of Sq RSS AIC
## - X16 1 5.645 103.12 45.726
## <none> 97.48 45.812
## - X7 1 85.100 182.58 65.148
## - X15 1 141.395 238.87 74.286
## - X11 1 154.638 252.12 76.120
##
## Step: AIC=45.73
## Y ~ X7 + X11 + X15
##
## Df Sum of Sq RSS AIC
## <none> 103.12 45.726
## - X7 1 86.896 190.02 64.506
## - X15 1 135.882 239.01 72.304
## - X11 1 149.411 252.54 74.177
summary(bmodelselect)
##
## Call:
## lm(formula = Y ~ X7 + X11 + X15, data = alibaca)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9650 -1.3485 -0.0106 1.2989 3.1738
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.51098 1.62113 5.867 2.02e-06 ***
## X7 0.42072 0.08368 5.028 2.15e-05 ***
## X11 0.81674 0.12388 6.593 2.69e-07 ***
## X15 0.74368 0.11828 6.287 6.26e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.854 on 30 degrees of freedom
## Multiple R-squared: 0.9499, Adjusted R-squared: 0.9449
## F-statistic: 189.5 on 3 and 30 DF, p-value: < 2.2e-16
Berdasarkan metode backward, model terbaik adalah model dengan peubah X7, X11, dan X15 dengan \[R-squared:0.9499\]
fmodelselect <- step(lm(Y ~ 1, alibaca), direction="forward", scope=formula(model4), trace=1)
## Start: AIC=141.5
## Y ~ 1
##
## Df Sum of Sq RSS AIC
## + X11 1 1764.14 293.54 77.293
## + X8 1 1474.98 582.71 100.605
## + X7 1 1448.01 609.68 102.143
## + X12 1 1388.05 669.63 105.333
## + X2 1 1211.11 846.58 113.305
## + X15 1 868.49 1189.20 124.859
## + X1 1 439.58 1618.11 135.330
## + X16 1 324.60 1733.08 137.664
## + X13 1 255.72 1801.96 138.989
## <none> 2057.69 141.501
## + X5 1 32.46 2025.23 142.961
## + X6 1 0.81 2056.88 143.488
## + X4 1 0.50 2057.19 143.493
## + X3 1 0.18 2057.51 143.498
##
## Step: AIC=77.29
## Y ~ X11
##
## Df Sum of Sq RSS AIC
## + X15 1 103.522 190.02 64.506
## + X12 1 64.453 229.09 70.864
## + X7 1 54.537 239.01 72.304
## + X8 1 27.134 266.41 75.995
## <none> 293.54 77.293
## + X13 1 16.518 277.03 77.324
## + X3 1 14.541 279.00 77.565
## + X1 1 12.655 280.89 77.794
## + X2 1 11.402 282.14 77.946
## + X6 1 4.778 288.76 78.735
## + X4 1 4.518 289.03 78.765
## + X16 1 0.750 292.79 79.206
## + X5 1 0.001 293.54 79.292
##
## Step: AIC=64.51
## Y ~ X11 + X15
##
## Df Sum of Sq RSS AIC
## + X7 1 86.896 103.12 45.726
## + X8 1 44.669 145.35 57.395
## + X12 1 38.032 151.99 58.913
## <none> 190.02 64.506
## + X2 1 9.576 180.45 64.748
## + X16 1 7.441 182.58 65.148
## + X3 1 6.291 183.73 65.362
## + X4 1 6.192 183.83 65.380
## + X1 1 3.012 187.01 65.963
## + X6 1 1.295 188.73 66.274
## + X5 1 0.843 189.18 66.355
## + X13 1 0.092 189.93 66.490
##
## Step: AIC=45.73
## Y ~ X11 + X15 + X7
##
## Df Sum of Sq RSS AIC
## <none> 103.12 45.726
## + X16 1 5.6448 97.48 45.812
## + X5 1 2.4430 100.68 46.911
## + X6 1 1.5710 101.55 47.204
## + X12 1 0.5327 102.59 47.550
## + X1 1 0.5068 102.62 47.558
## + X3 1 0.2993 102.83 47.627
## + X4 1 0.1743 102.95 47.668
## + X2 1 0.0824 103.04 47.699
## + X8 1 0.0228 103.10 47.718
## + X13 1 0.0172 103.11 47.720
summary(fmodelselect)
##
## Call:
## lm(formula = Y ~ X11 + X15 + X7, data = alibaca)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9650 -1.3485 -0.0106 1.2989 3.1738
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.51098 1.62113 5.867 2.02e-06 ***
## X11 0.81674 0.12388 6.593 2.69e-07 ***
## X15 0.74368 0.11828 6.287 6.26e-07 ***
## X7 0.42072 0.08368 5.028 2.15e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.854 on 30 degrees of freedom
## Multiple R-squared: 0.9499, Adjusted R-squared: 0.9449
## F-statistic: 189.5 on 3 and 30 DF, p-value: < 2.2e-16
Hasil forward selection menunjukkan bahwa model terbaik adalah model dengan peubah X11, X15, dan X7 dengan \[R-squared: 0.9499\]. Hasil ini sama dengan hasil backward selection.
# Stepwise Selection
smodelselect <- step(lm(Y ~ 1, alibaca), direction="both", scope=formula(model4), trace=1)
## Start: AIC=141.5
## Y ~ 1
##
## Df Sum of Sq RSS AIC
## + X11 1 1764.14 293.54 77.293
## + X8 1 1474.98 582.71 100.605
## + X7 1 1448.01 609.68 102.143
## + X12 1 1388.05 669.63 105.333
## + X2 1 1211.11 846.58 113.305
## + X15 1 868.49 1189.20 124.859
## + X1 1 439.58 1618.11 135.330
## + X16 1 324.60 1733.08 137.664
## + X13 1 255.72 1801.96 138.989
## <none> 2057.69 141.501
## + X5 1 32.46 2025.23 142.961
## + X6 1 0.81 2056.88 143.488
## + X4 1 0.50 2057.19 143.493
## + X3 1 0.18 2057.51 143.498
##
## Step: AIC=77.29
## Y ~ X11
##
## Df Sum of Sq RSS AIC
## + X15 1 103.52 190.02 64.506
## + X12 1 64.45 229.09 70.864
## + X7 1 54.54 239.01 72.304
## + X8 1 27.13 266.41 75.995
## <none> 293.54 77.293
## + X13 1 16.52 277.03 77.324
## + X3 1 14.54 279.00 77.565
## + X1 1 12.66 280.89 77.794
## + X2 1 11.40 282.14 77.946
## + X6 1 4.78 288.77 78.735
## + X4 1 4.52 289.03 78.765
## + X16 1 0.75 292.79 79.206
## + X5 1 0.00 293.54 79.292
## - X11 1 1764.14 2057.69 141.501
##
## Step: AIC=64.51
## Y ~ X11 + X15
##
## Df Sum of Sq RSS AIC
## + X7 1 86.90 103.12 45.726
## + X8 1 44.67 145.35 57.395
## + X12 1 38.03 151.99 58.913
## <none> 190.02 64.506
## + X2 1 9.58 180.45 64.748
## + X16 1 7.44 182.58 65.148
## + X3 1 6.29 183.73 65.362
## + X4 1 6.19 183.83 65.380
## + X1 1 3.01 187.01 65.963
## + X6 1 1.29 188.73 66.274
## + X5 1 0.84 189.18 66.355
## + X13 1 0.09 189.93 66.490
## - X15 1 103.52 293.54 77.293
## - X11 1 999.18 1189.20 124.859
##
## Step: AIC=45.73
## Y ~ X11 + X15 + X7
##
## Df Sum of Sq RSS AIC
## <none> 103.12 45.726
## + X16 1 5.645 97.48 45.812
## + X5 1 2.443 100.68 46.911
## + X6 1 1.571 101.55 47.204
## + X12 1 0.533 102.59 47.550
## + X1 1 0.507 102.62 47.558
## + X3 1 0.299 102.83 47.627
## + X4 1 0.174 102.95 47.668
## + X2 1 0.082 103.04 47.699
## + X8 1 0.023 103.10 47.718
## + X13 1 0.017 103.11 47.720
## - X7 1 86.896 190.02 64.506
## - X15 1 135.882 239.01 72.304
## - X11 1 149.411 252.54 74.177
summary(smodelselect)
##
## Call:
## lm(formula = Y ~ X11 + X15 + X7, data = alibaca)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9650 -1.3485 -0.0106 1.2989 3.1738
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.51098 1.62113 5.867 2.02e-06 ***
## X11 0.81674 0.12388 6.593 2.69e-07 ***
## X15 0.74368 0.11828 6.287 6.26e-07 ***
## X7 0.42072 0.08368 5.028 2.15e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.854 on 30 degrees of freedom
## Multiple R-squared: 0.9499, Adjusted R-squared: 0.9449
## F-statistic: 189.5 on 3 and 30 DF, p-value: < 2.2e-16
Hasil stepwise menunjukkan hasil yang sama dengan metode backward dan forward selection, yaitu model terbaik dengan peubah X11, X15, dan X7 dengan \[R-squared: 0.9499\].
matrix_X <- data.matrix(alibaca[, -1])
matrix_Y = matrix(alibaca$Y)
alpha_ridge = 0
model_Ridge <- glmnet::cv.glmnet(matrix_X,matrix_Y,alpha=alpha_ridge)
summary(model_Ridge)
## Length Class Mode
## lambda 100 -none- numeric
## cvm 100 -none- numeric
## cvsd 100 -none- numeric
## cvup 100 -none- numeric
## cvlo 100 -none- numeric
## nzero 100 -none- numeric
## call 4 -none- call
## name 1 -none- character
## glmnet.fit 12 elnet list
## lambda.min 1 -none- numeric
## lambda.1se 1 -none- numeric
## index 2 -none- numeric
# Hasil Regresi Ridge
print(model_Ridge)
##
## Call: glmnet::cv.glmnet(x = matrix_X, y = matrix_Y, alpha = alpha_ridge)
##
## Measure: Mean-Squared Error
##
## Lambda Index Measure SE Nonzero
## min 2.205 88 6.944 1.743 16
## 1se 8.900 73 8.437 1.553 16
# Memilih nilai lambda terbaik
best_lambda <- model_Ridge$lambda.min
cat("Lambda terbaik:", best_lambda, "\n")
## Lambda terbaik: 2.204584
# Melakukan prediksi dengan model Ridge terbaik
predictions <- predict(model_Ridge, s = best_lambda, newx = matrix_X)
# koefisien Ridge
coef(model_Ridge, s = best_lambda)
## 17 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 6.493880e-01
## X1 9.867823e-02
## X2 4.212397e-01
## X3 -2.117023e-04
## X4 6.976193e-04
## X5 5.388473e-04
## X6 1.565883e-03
## X7 1.463658e-01
## X8 3.128072e-01
## X9 -5.030915e-05
## X10 1.500893e-01
## X11 2.522059e-01
## X12 1.102334e-01
## X13 3.680095e-02
## X14 1.703353e-01
## X15 5.405333e-01
## X16 -8.080940e-01
# R-squared untuk model Ridge
r_squared_ridge <- 1 - sum((matrix_Y - predictions)^2) / sum((matrix_Y - mean(matrix_Y))^2)
r_squared_ridge
## [1] 0.9583421
Pada regresi Ridge, tidak terdapat peubah yang dihilangkan atau semua peubah dimasukkan dalam model. R-Square yang diperoleh dengan regresi ridge adalah \[R-squared: 0.9642356\].
alpha_Lasso = 1
model_Lasso <- glmnet::cv.glmnet(matrix_X,matrix_Y,alpha=alpha_Lasso)
model_Lasso
##
## Call: glmnet::cv.glmnet(x = matrix_X, y = matrix_Y, alpha = alpha_Lasso)
##
## Measure: Mean-Squared Error
##
## Lambda Index Measure SE Nonzero
## min 0.0911 48 4.735 0.6926 12
## 1se 0.6426 27 5.329 1.2802 6
# Hasil regresi Lasso
print(model_Lasso)
##
## Call: glmnet::cv.glmnet(x = matrix_X, y = matrix_Y, alpha = alpha_Lasso)
##
## Measure: Mean-Squared Error
##
## Lambda Index Measure SE Nonzero
## min 0.0911 48 4.735 0.6926 12
## 1se 0.6426 27 5.329 1.2802 6
# Memilih nilai lambda terbaik
best_lambdal <- model_Lasso$lambda.min
cat("Lambda terbaik:", best_lambdal, "\n")
## Lambda terbaik: 0.09109367
# Melakukan prediksi dengan model Ridge terbaik
predictions_lasso <- predict(model_Lasso, s = best_lambdal, newx = matrix_X)
# Koefisien Ridge
coef(model_Lasso, s = best_lambdal)
## 17 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 3.3592458805
## X1 0.0716148842
## X2 .
## X3 -0.0001877572
## X4 .
## X5 0.0004775011
## X6 0.0006912686
## X7 0.1818394960
## X8 0.1763347932
## X9 .
## X10 0.2735023546
## X11 0.0698529659
## X12 .
## X13 0.0696808690
## X14 0.2866695470
## X15 0.7354839379
## X16 -1.7087881462
# Menghitung R-squared untuk model Lasso
r_squared_lasso <- 1 - sum((matrix_Y - predictions_lasso)^2) / sum((matrix_Y - mean(matrix_Y))^2)
Melalui regresi Lasso terlihat bahwa beberapa peubah tidak dimasukkan ke dalam model, yakni peubah X1, X3, X4, X5, X6, X9, dan X12. R-Square yang diperoleh dengan regresi Lasso adalah \[R-squared: 0.9618233\].
Jika dibandingkan dari ketiga model, berdasarkan nilai R-Square, model terbaik adalah menggunakan regresi Ridge dengan \[R-squared: 0.9642356\]. Namun, regresi Ridge masih melibatkan semua peubah, tanpa ada yang dihilangkan atau tidak dimasukkan ke dalam model. Berbeda dengan metode Variable Selection dan Regresi Lasso yang menghilangkan beberapa peubah, model Lasso memiliki R-Squared tertinggi dengan \[R-squared: 0.9618233\] walaupun telah menghilangkan 7 peubah.
Dengan ini disimpulkan bahwa Regresi Lasso jauh lebih baik untuk menangani model dengan peubah terdeteksi multikolinearitas tinggi jika dibandingkan dengan metode Variable Selection dan Regresi Ridge.
Model regresi Lasso yang diperoleh adalah \[Y = 8.952606073+0.001495293X_{1}+0.215362457X_{2}+0.205837387X_{7}+0.067142635X_{8}+0.262377309X_{10}+0.107796422X_{11}+0.064969485X_{13}+0.247909843X_{14}+0.716416128X_{15}-0.722678655X_{16}\]
Model menjelaskan untuk setiap penambahan satu satuan X1, X2, X7, X8, X10, X11, X13, X14 atau X15 dengan peubah lain dianggap tetap akan meningkatkan peubah respon (indeks Alibaca) sebanyak koefisien dari masing-masing peubah. Namun, setiap penambahan satu satuan X16 akan mengurangi indeks Alibaca sebesar 0.722678655 satuan.