Load Library
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
## Warning: package 'psych' was built under R version 4.4.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
## The following object is masked from 'package:dplyr':
##
## recode
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.4.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.4.2
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(pls)
## Warning: package 'pls' was built under R version 4.4.3
##
## Attaching package: 'pls'
## The following object is masked from 'package:stats':
##
## loadings
library(glmnet)
## Warning: package 'glmnet' was built under R version 4.4.3
## Loading required package: Matrix
## Loaded glmnet 4.1-10
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:pls':
##
## R2
library(tidyr)
##
## Attaching package: 'tidyr'
## The following objects are masked from 'package:Matrix':
##
## expand, pack, unpack
Load dan Persiapan Data
# Import data
data <- read_excel("Data Penelitian.xlsx", skip = 1)
# Ubah nama kolom agar rapi
colnames(data) <- make.names(colnames(data))
# Struktur data
str(data)
## tibble [38 × 14] (S3: tbl_df/tbl/data.frame)
## $ Provinsi: chr [1:38] "ACEH" "SUMATERA UTARA" "SUMATERA BARAT" "RIAU" ...
## $ Y : num [1:38] 12.22 7.24 5.31 6.3 6.89 ...
## $ X1 : num [1:38] 5.64 5.32 5.62 4.16 4.26 3.69 3.41 4.21 4.45 6.45 ...
## $ X2 : num [1:38] 741980 765842 821552 844678 775092 ...
## $ X3 : num [1:38] 15.4 15.4 18.6 17.4 13.5 ...
## $ X4 : num [1:38] 10.42 10.29 10.03 9.8 9.32 ...
## $ X5 : num [1:38] 99.9 99.9 99.9 100 99.9 ...
## $ X6 : num [1:38] 85.5 80.6 87.5 64.8 74.7 ...
## $ X7 : num [1:38] 99.5 99.6 99.9 99.9 99.9 ...
## $ X8 : num [1:38] 85.2 73.2 73 78.7 89.4 ...
## $ X9 : num [1:38] 92.1 93.2 87.3 92.2 82.5 ...
## $ X10 : num [1:38] 82.2 87.5 74.6 91.2 85.9 ...
## $ X11 : num [1:38] 547511 551093 592911 549211 542466 ...
## $ X12 : num [1:38] 0.274 0.283 0.28 0.304 0.291 0.298 0.339 0.287 0.214 0.385 ...
# Menentukan variabel
Y <- data[[2]] # variabel dependen (kemiskinan)
X <- data[, -c(1, 2)] # variabel independen
# Untuk Ridge
X_matrix <- as.matrix(X)
ANALISIS DESKRIPTIF
# Statistik deskriptif
describe(data)
## vars n mean sd median trimmed mad min
## Provinsi* 1 38 19.50 11.11 19.50 19.50 14.08 1.00
## Y 2 38 10.27 6.27 9.35 9.46 5.29 3.42
## X1 3 38 4.47 1.38 4.21 4.47 1.26 1.49
## X2 4 38 796647.84 142821.18 777366.31 781698.21 112498.65 559895.02
## X3 5 38 14.70 5.85 13.94 14.40 7.43 6.65
## X4 6 38 9.46 1.19 9.57 9.59 0.92 4.76
## X5 7 38 99.33 2.13 99.87 99.86 0.09 90.48
## X6 8 38 84.10 11.32 88.31 85.46 7.52 41.78
## X7 9 38 98.60 3.43 99.66 99.33 0.46 80.02
## X8 10 38 84.93 7.81 85.76 85.72 7.29 54.54
## X9 11 38 87.61 11.61 89.94 89.33 8.67 32.89
## X10 12 38 82.96 14.96 85.99 85.62 7.18 16.34
## X11 13 38 528730.45 112156.80 513739.50 517416.66 109439.60 386850.00
## X12 14 38 0.33 0.05 0.33 0.33 0.06 0.21
## max range skew kurtosis se
## Provinsi* 38.00 37.00 0.00 -1.30 1.80
## Y 29.45 26.03 1.34 1.32 1.02
## X1 6.96 5.47 0.10 -0.62 0.22
## X2 1256746.67 696851.64 1.13 1.77 23168.65
## X3 27.20 20.55 0.34 -1.16 0.95
## X4 11.58 6.82 -1.84 5.20 0.19
## X5 99.97 9.49 -3.75 12.61 0.35
## X6 99.14 57.36 -1.59 3.07 1.84
## X7 100.00 19.98 -4.31 19.96 0.56
## X8 96.61 42.07 -1.55 3.78 1.27
## X9 99.98 67.09 -2.78 10.35 1.88
## X10 98.20 81.86 -2.89 9.34 2.43
## X11 942805.00 555955.00 1.38 2.72 18194.24
## X12 0.43 0.21 0.11 -0.76 0.01
summary(data)
## Provinsi Y X1 X2
## Length:38 Min. : 3.420 Min. :1.490 Min. : 559895
## Class :character 1st Qu.: 5.530 1st Qu.:3.500 1st Qu.: 697439
## Mode :character Median : 9.345 Median :4.210 Median : 777366
## Mean :10.269 Mean :4.468 Mean : 796648
## 3rd Qu.:12.135 3rd Qu.:5.545 3rd Qu.: 848151
## Max. :29.450 Max. :6.960 Max. :1256747
## X3 X4 X5 X6
## Min. : 6.652 Min. : 4.760 Min. :90.48 Min. :41.78
## 1st Qu.: 9.649 1st Qu.: 8.932 1st Qu.:99.80 1st Qu.:80.49
## Median :13.940 Median : 9.575 Median :99.87 Median :88.31
## Mean :14.701 Mean : 9.457 Mean :99.33 Mean :84.10
## 3rd Qu.:20.468 3rd Qu.:10.092 3rd Qu.:99.93 3rd Qu.:91.54
## Max. :27.198 Max. :11.580 Max. :99.97 Max. :99.14
## X7 X8 X9 X10
## Min. : 80.02 Min. :54.54 Min. :32.89 Min. :16.34
## 1st Qu.: 99.07 1st Qu.:81.82 1st Qu.:83.29 1st Qu.:81.15
## Median : 99.67 Median :85.76 Median :89.94 Median :85.99
## Mean : 98.60 Mean :84.93 Mean :87.61 Mean :82.96
## 3rd Qu.: 99.91 3rd Qu.:90.77 3rd Qu.:94.90 3rd Qu.:90.12
## Max. :100.00 Max. :96.61 Max. :99.98 Max. :98.20
## X11 X12
## Min. :386850 Min. :0.2140
## 1st Qu.:445752 1st Qu.:0.2848
## Median :513740 Median :0.3275
## Mean :528730 Mean :0.3284
## 3rd Qu.:590740 3rd Qu.:0.3605
## Max. :942805 Max. :0.4260
stat_desc <- data.frame(
Variabel = names(data)[-1],
Mean = sapply(data[,-1], mean, na.rm = TRUE),
Min = sapply(data[,-1], min, na.rm = TRUE),
Max = sapply(data[,-1], max, na.rm = TRUE),
SD = sapply(data[,-1], sd, na.rm = TRUE)
)
stat_desc
## Variabel Mean Min Max SD
## Y Y 1.026947e+01 3.420000e+00 2.945000e+01 6.268886e+00
## X1 X1 4.468158e+00 1.490000e+00 6.960000e+00 1.381286e+00
## X2 X2 7.966478e+05 5.598950e+05 1.256747e+06 1.428212e+05
## X3 X3 1.470112e+01 6.651936e+00 2.719814e+01 5.850485e+00
## X4 X4 9.457368e+00 4.760000e+00 1.158000e+01 1.189980e+00
## X5 X5 9.932789e+01 9.048000e+01 9.997000e+01 2.127473e+00
## X6 X6 8.410237e+01 4.178000e+01 9.914000e+01 1.132476e+01
## X7 X7 9.860474e+01 8.002000e+01 1.000000e+02 3.426218e+00
## X8 X8 8.493342e+01 5.454000e+01 9.661000e+01 7.807837e+00
## X9 X9 8.760789e+01 3.289000e+01 9.998000e+01 1.161487e+01
## X10 X10 8.295816e+01 1.634000e+01 9.820000e+01 1.496021e+01
## X11 X11 5.287304e+05 3.868500e+05 9.428050e+05 1.121568e+05
## X12 X12 3.284474e-01 2.140000e-01 4.260000e-01 5.080578e-02
ANALISIS OLS + UJI ASUMSI KLASIK
# Model OLS
model_ols <- lm(Y ~ ., data = X)
summary(model_ols)
##
## Call:
## lm(formula = Y ~ ., data = X)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2086 -1.4459 0.3424 1.1685 3.5009
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.918e+01 6.544e+01 1.516 0.142181
## X1 1.490e-01 4.246e-01 0.351 0.728654
## X2 -3.831e-05 8.134e-06 -4.709 7.92e-05 ***
## X3 7.068e-02 9.231e-02 0.766 0.450994
## X4 1.109e+00 1.115e+00 0.994 0.329570
## X5 -1.351e+00 5.960e-01 -2.267 0.032323 *
## X6 -1.988e-02 6.977e-02 -0.285 0.778017
## X7 2.247e-01 2.243e-01 1.002 0.325953
## X8 1.461e-01 1.279e-01 1.142 0.264195
## X9 -3.555e-02 7.261e-02 -0.490 0.628692
## X10 -1.709e-01 1.145e-01 -1.494 0.147794
## X11 4.270e-05 9.770e-06 4.370 0.000191 ***
## X12 7.739e+01 1.260e+01 6.142 2.02e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.258 on 25 degrees of freedom
## Multiple R-squared: 0.9123, Adjusted R-squared: 0.8702
## F-statistic: 21.68 on 12 and 25 DF, p-value: 2.922e-10
# Uji Normalitas (Kolmogorov-Smirnov)
residuals_ols <- residuals(model_ols)
ks.test(residuals_ols, "pnorm",
mean(residuals_ols),
sd(residuals_ols))
##
## Exact one-sample Kolmogorov-Smirnov test
##
## data: residuals_ols
## D = 0.097594, p-value = 0.8279
## alternative hypothesis: two-sided
# Uji Multikolinearitas (VIF)
vif(model_ols)
## X1 X2 X3 X4 X5 X6 X7 X8
## 2.496155 9.792465 2.116038 12.776215 11.663974 4.529448 4.283562 7.235075
## X9 X10 X11 X12
## 5.160012 21.271451 8.712569 2.974135
# Uji Heteroskedastisitas (Breusch-Pagan)
bptest(model_ols)
##
## studentized Breusch-Pagan test
##
## data: model_ols
## BP = 11.321, df = 12, p-value = 0.5016
# Uji Autokorelasi (Durbin-Watson)
dwtest(model_ols)
##
## Durbin-Watson test
##
## data: model_ols
## DW = 1.809, p-value = 0.14
## alternative hypothesis: true autocorrelation is greater than 0
PRINCIPAL COMPONENT REGRESSION (PCR)
model_pcr <- pcr(Y ~ ., data = X, scale = TRUE, validation = "CV")
summary(model_pcr)
## Data: X dimension: 38 12
## Y dimension: 38 1
## Fit method: svdpc
## Number of components considered: 12
##
## VALIDATION: RMSEP
## Cross-validated using 10 random segments.
## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps
## CV 6.353 4.677 5.909 5.519 4.884 4.771 4.906
## adjCV 6.353 4.650 5.809 5.378 4.735 4.643 4.764
## 7 comps 8 comps 9 comps 10 comps 11 comps 12 comps
## CV 5.032 6.040 6.421 5.817 4.058 4.125
## adjCV 4.932 5.828 6.190 5.541 3.906 3.976
##
## TRAINING: % variance explained
## 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps 7 comps 8 comps
## X 42.25 64.66 75.34 84.84 89.77 92.83 95.50 97.55
## Y 48.09 51.09 63.00 77.06 77.06 77.87 77.91 79.79
## 9 comps 10 comps 11 comps 12 comps
## X 98.83 99.35 99.78 100.00
## Y 81.11 88.92 91.15 91.23
validationplot(model_pcr, val.type = "MSEP")

# Ambil jumlah komponen optimal (misal 2)
model_pcr_final <- pcr(Y ~ ., data = X, scale = TRUE, ncomp = 2)
pred_pcr <- predict(model_pcr_final, X, ncomp = 2)
mse_pcr <- mean((Y - pred_pcr)^2)
r2_pcr <- cor(Y, pred_pcr)^2
# Ambil nilai MSEP dari CV
msep_pcr <- MSEP(model_pcr)
# Lihat nilai error tiap komponen
msep_pcr$val
## , , model = (Intercept)
##
## response
## estimate Y
## CV 40.36107
## adjCV 40.36107
##
## , , model = 1 comps
##
## response
## estimate Y
## CV 21.87850
## adjCV 21.62385
##
## , , model = 2 comps
##
## response
## estimate Y
## CV 34.91638
## adjCV 33.74772
##
## , , model = 3 comps
##
## response
## estimate Y
## CV 30.45491
## adjCV 28.91762
##
## , , model = 4 comps
##
## response
## estimate Y
## CV 23.85227
## adjCV 22.42437
##
## , , model = 5 comps
##
## response
## estimate Y
## CV 22.75909
## adjCV 21.55931
##
## , , model = 6 comps
##
## response
## estimate Y
## CV 24.06637
## adjCV 22.69909
##
## , , model = 7 comps
##
## response
## estimate Y
## CV 25.32271
## adjCV 24.32584
##
## , , model = 8 comps
##
## response
## estimate Y
## CV 36.47779
## adjCV 33.96761
##
## , , model = 9 comps
##
## response
## estimate Y
## CV 41.23083
## adjCV 38.31006
##
## , , model = 10 comps
##
## response
## estimate Y
## CV 33.84084
## adjCV 30.70468
##
## , , model = 11 comps
##
## response
## estimate Y
## CV 16.46974
## adjCV 15.25645
##
## , , model = 12 comps
##
## response
## estimate Y
## CV 17.01316
## adjCV 15.80978
# Pilih komponen dengan MSE terkecil
opt_comp_pcr <- which.min(msep_pcr$val[1,1,])
opt_comp_pcr
## 11 comps
## 12
model_pcr_final <- pcr(Y ~ ., data = X,
scale = TRUE,
ncomp = opt_comp_pcr)
RIDGE REGRESSION
ridge_cv <- cv.glmnet(X_matrix, Y, alpha = 0)
best_lambda <- ridge_cv$lambda.min
ridge_model <- glmnet(X_matrix, Y,
alpha = 0,
lambda = best_lambda)
pred_ridge <- predict(ridge_model,
s = best_lambda,
newx = X_matrix)
mse_ridge <- mean((Y - pred_ridge)^2)
r2_ridge <- cor(Y, pred_ridge)^2
# Visualisasi Ridge
plot(ridge_cv)

plot(ridge_model, xvar = "lambda", label = TRUE)

PARTIAL LEAST SQUARES (PLS)
model_pls <- plsr(Y ~ ., data = X,
scale = TRUE, validation = "CV")
summary(model_pls)
## Data: X dimension: 38 12
## Y dimension: 38 1
## Fit method: kernelpls
## Number of components considered: 12
##
## VALIDATION: RMSEP
## Cross-validated using 10 random segments.
## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps
## CV 6.353 4.937 4.967 4.382 5.132 4.960 4.635
## adjCV 6.353 4.866 4.812 4.283 4.928 4.777 4.437
## 7 comps 8 comps 9 comps 10 comps 11 comps 12 comps
## CV 4.066 3.650 3.515 3.562 3.524 3.524
## adjCV 3.912 3.531 3.408 3.452 3.417 3.417
##
## TRAINING: % variance explained
## 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps 7 comps 8 comps
## X 40.97 54.40 73.87 77.77 85.93 88.22 90.25 92.07
## Y 62.56 78.91 81.01 86.51 88.24 90.65 91.14 91.21
## 9 comps 10 comps 11 comps 12 comps
## X 96.42 97.42 99.70 100.00
## Y 91.22 91.23 91.23 91.23
validationplot(model_pls, val.type = "MSEP")

# Misal 2 komponen
model_pls_final <- plsr(Y ~ ., data = X,
scale = TRUE, ncomp = 2)
pred_pls <- predict(model_pls_final, X, ncomp = 2)
mse_pls <- mean((Y - pred_pls)^2)
r2_pls <- cor(Y, pred_pls)^2
msep_pls <- MSEP(model_pls)
msep_pls$val
## , , model = (Intercept)
##
## response
## estimate Y
## CV 40.36107
## adjCV 40.36107
##
## , , model = 1 comps
##
## response
## estimate Y
## CV 24.37860
## adjCV 23.68137
##
## , , model = 2 comps
##
## response
## estimate Y
## CV 24.66777
## adjCV 23.15085
##
## , , model = 3 comps
##
## response
## estimate Y
## CV 19.19937
## adjCV 18.34565
##
## , , model = 4 comps
##
## response
## estimate Y
## CV 26.33805
## adjCV 24.28509
##
## , , model = 5 comps
##
## response
## estimate Y
## CV 24.6063
## adjCV 22.8155
##
## , , model = 6 comps
##
## response
## estimate Y
## CV 21.48456
## adjCV 19.69090
##
## , , model = 7 comps
##
## response
## estimate Y
## CV 16.53139
## adjCV 15.30136
##
## , , model = 8 comps
##
## response
## estimate Y
## CV 13.32373
## adjCV 12.46593
##
## , , model = 9 comps
##
## response
## estimate Y
## CV 12.35223
## adjCV 11.61237
##
## , , model = 10 comps
##
## response
## estimate Y
## CV 12.68984
## adjCV 11.91622
##
## , , model = 11 comps
##
## response
## estimate Y
## CV 12.41863
## adjCV 11.67566
##
## , , model = 12 comps
##
## response
## estimate Y
## CV 12.41782
## adjCV 11.67476
opt_comp_pls <- which.min(msep_pls$val[1,1,])
opt_comp_pls
## 9 comps
## 10
model_pls_final <- plsr(Y ~ ., data = X,
scale = TRUE,
ncomp = opt_comp_pls)
PERBANDINGAN KINERJA MODEL
hasil <- data.frame(
Model = c("PCR", "Ridge", "PLS"),
MSE = c(mse_pcr, mse_ridge, mse_pls),
R2 = c(r2_pcr, r2_ridge, r2_pls)
)
hasil
## Model MSE R2
## 1 PCR 18.715274 0.5109004
## 2 Ridge 12.913703 0.7492665
## 3 PLS 8.070855 0.7890786
# Urutkan model terbaik
hasil[order(-hasil$R2, hasil$MSE), ]
## Model MSE R2
## 3 PLS 8.070855 0.7890786
## 2 Ridge 12.913703 0.7492665
## 1 PCR 18.715274 0.5109004
INTERPRETASI MODEL (OLS SEBAGAI PEMBANDING)
summary(model_ols)
##
## Call:
## lm(formula = Y ~ ., data = X)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.2086 -1.4459 0.3424 1.1685 3.5009
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.918e+01 6.544e+01 1.516 0.142181
## X1 1.490e-01 4.246e-01 0.351 0.728654
## X2 -3.831e-05 8.134e-06 -4.709 7.92e-05 ***
## X3 7.068e-02 9.231e-02 0.766 0.450994
## X4 1.109e+00 1.115e+00 0.994 0.329570
## X5 -1.351e+00 5.960e-01 -2.267 0.032323 *
## X6 -1.988e-02 6.977e-02 -0.285 0.778017
## X7 2.247e-01 2.243e-01 1.002 0.325953
## X8 1.461e-01 1.279e-01 1.142 0.264195
## X9 -3.555e-02 7.261e-02 -0.490 0.628692
## X10 -1.709e-01 1.145e-01 -1.494 0.147794
## X11 4.270e-05 9.770e-06 4.370 0.000191 ***
## X12 7.739e+01 1.260e+01 6.142 2.02e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.258 on 25 degrees of freedom
## Multiple R-squared: 0.9123, Adjusted R-squared: 0.8702
## F-statistic: 21.68 on 12 and 25 DF, p-value: 2.922e-10
# Variabel signifikan
coef_table <- summary(model_ols)$coefficients
signif_var <- coef_table[coef_table[,4] < 0.05, ]
signif_var
## Estimate Std. Error t value Pr(>|t|)
## X2 -3.830589e-05 8.134105e-06 -4.709293 7.915247e-05
## X5 -1.350795e+00 5.959579e-01 -2.266594 3.232313e-02
## X11 4.269621e-05 9.770210e-06 4.370041 1.907040e-04
## X12 7.739346e+01 1.260154e+01 6.141587 2.021553e-06