#Memuat Pustaka Yang dimuat
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
## Warning: package 'psych' was built under R version 4.4.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
## The following object is masked from 'package:dplyr':
##
## recode
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.4.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.4.2
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(pls)
## Warning: package 'pls' was built under R version 4.4.3
##
## Attaching package: 'pls'
## The following object is masked from 'package:stats':
##
## loadings
library(glmnet)
## Warning: package 'glmnet' was built under R version 4.4.3
## Loading required package: Matrix
## Loaded glmnet 4.1-10
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:pls':
##
## R2
library(tidyr)
##
## Attaching package: 'tidyr'
## The following objects are masked from 'package:Matrix':
##
## expand, pack, unpack
# Membaca dataset
data <- read_excel("Data Kemiskinan 2025.xlsx", skip = 1)
# Ubah nama kolom agar rapi
colnames(data) <- make.names(colnames(data))
# Struktur data
str(data)
## tibble [40 × 14] (S3: tbl_df/tbl/data.frame)
## $ Provinsi: chr [1:40] "ACEH" "SUMATERA UTARA" "SUMATERA BARAT" "RIAU" ...
## $ Y : num [1:40] 12.22 7.24 5.31 6.3 6.89 ...
## $ X1 : num [1:40] 5.64 5.32 5.62 4.16 4.26 3.69 3.41 4.21 4.45 6.45 ...
## $ X2 : num [1:40] 741980 765842 821552 844678 775092 ...
## $ X3 : num [1:40] 15.4 15.4 18.6 17.4 13.5 ...
## $ X4 : num [1:40] 10.42 10.29 10.03 9.8 9.32 ...
## $ X5 : num [1:40] 98.9 99 99.1 99 97.6 ...
## $ X6 : num [1:40] 85.5 80.6 87.5 64.8 74.7 ...
## $ X7 : num [1:40] 99.5 99.6 99.9 99.9 99.9 ...
## $ X8 : num [1:40] 85.2 73.2 73 78.7 89.4 ...
## $ X9 : num [1:40] 92.1 93.2 87.3 92.2 82.5 ...
## $ X10 : num [1:40] 82.2 87.5 74.6 91.2 85.9 ...
## $ X11 : num [1:40] 676247 666546 729806 713117 664127 ...
## $ X12 : num [1:40] 0.274 0.283 0.28 0.304 0.291 0.298 0.339 0.287 0.214 0.385 ...
# Menentukan variabel
Y <- data[[2]] # variabel dependen (kemiskinan)
X <- data[, -c(1, 2)] # variabel independen
# Untuk Ridge
X_matrix <- as.matrix(X)
#Melihat data
head (data)
## # A tibble: 6 × 14
## Provinsi Y X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ACEH 12.2 5.64 7.42e5 15.4 10.4 98.9 85.5 99.5 85.2 92.1 82.2
## 2 SUMATERA U… 7.24 5.32 7.66e5 15.4 10.3 99.0 80.6 99.6 73.2 93.2 87.5
## 3 SUMATERA B… 5.31 5.62 8.22e5 18.6 10.0 99.1 87.4 99.9 73.0 87.3 74.6
## 4 RIAU 6.3 4.16 8.45e5 17.4 9.8 99 64.8 99.9 78.7 92.2 91.2
## 5 JAMBI 6.89 4.26 7.75e5 13.5 9.32 97.6 74.7 99.9 89.4 82.5 85.9
## 6 SUMATERA S… 9.85 3.69 6.91e5 13.5 9.28 98.9 80.4 99.0 85.9 86.8 85.5
## # ℹ 2 more variables: X11 <dbl>, X12 <dbl>
# Statistik deskriptif
describe(data)
## vars n mean sd median trimmed mad min
## Provinsi* 1 38 19.50 11.11 19.50 19.50 14.08 1.00
## Y 2 40 10.58 6.92 9.35 9.46 5.63 3.42
## X1 3 40 4.46 1.48 4.21 4.47 1.33 1.49
## X2 4 40 802231.49 161818.01 777366.31 781698.21 123001.16 559895.02
## X3 5 40 14.81 6.17 13.94 14.40 8.25 6.65
## X4 6 40 9.39 1.42 9.57 9.59 1.01 4.76
## X5 7 40 95.96 6.93 98.17 97.69 1.38 68.32
## X6 8 40 83.42 13.15 88.31 85.46 8.10 41.78
## X7 9 40 98.17 4.46 99.66 99.33 0.47 80.02
## X8 10 40 84.47 9.21 85.76 85.72 7.64 54.54
## X9 11 40 86.55 14.41 89.94 89.33 8.84 32.89
## X10 12 40 81.67 18.18 85.99 85.62 7.53 16.34
## X11 13 40 682779.58 162634.12 658231.50 665247.22 149437.18 475488.00
## X12 14 40 0.33 0.06 0.33 0.33 0.06 0.21
## max range skew kurtosis se
## Provinsi* 38.00 37.00 0.00 -1.30 1.80
## Y 29.45 26.03 1.32 1.03 1.09
## X1 6.96 5.47 0.02 -0.69 0.23
## X2 1256746.67 696851.64 1.13 1.33 25585.67
## X3 27.20 20.55 0.38 -1.13 0.98
## X4 11.58 6.82 -1.71 3.61 0.22
## X5 99.80 31.48 -3.22 9.82 1.09
## X6 99.14 57.36 -1.61 2.55 2.08
## X7 100.00 19.98 -3.39 10.83 0.70
## X8 96.61 42.07 -1.60 3.02 1.46
## X9 99.98 67.09 -2.52 6.80 2.28
## X10 98.20 81.86 -2.54 6.16 2.87
## X11 1132178.00 656690.00 1.01 0.68 25714.71
## X12 0.43 0.21 0.02 -0.74 0.01
summary(data)
## Provinsi Y X1 X2
## Length:40 Min. : 3.420 Min. :1.490 Min. : 559895
## Class :character 1st Qu.: 5.500 1st Qu.:3.450 1st Qu.: 688193
## Mode :character Median : 9.345 Median :4.210 Median : 777366
## Mean :10.578 Mean :4.456 Mean : 802232
## 3rd Qu.:12.320 3rd Qu.:5.625 3rd Qu.: 851277
## Max. :29.450 Max. :6.960 Max. :1256747
## X3 X4 X5 X6
## Min. : 6.652 Min. : 4.760 Min. :68.32 Min. :41.78
## 1st Qu.: 9.611 1st Qu.: 8.902 1st Qu.:95.93 1st Qu.:79.39
## Median :13.940 Median : 9.575 Median :98.17 Median :88.31
## Mean :14.812 Mean : 9.393 Mean :95.96 Mean :83.42
## 3rd Qu.:20.714 3rd Qu.:10.148 3rd Qu.:98.97 3rd Qu.:91.72
## Max. :27.198 Max. :11.580 Max. :99.80 Max. :99.14
## X7 X8 X9 X10
## Min. : 80.02 Min. :54.54 Min. :32.89 Min. :16.34
## 1st Qu.: 99.03 1st Qu.:81.01 1st Qu.:83.16 1st Qu.:80.50
## Median : 99.67 Median :85.76 Median :89.94 Median :85.99
## Mean : 98.17 Mean :84.47 Mean :86.55 Mean :81.67
## 3rd Qu.: 99.92 3rd Qu.:90.89 3rd Qu.:95.11 3rd Qu.:90.77
## Max. :100.00 Max. :96.61 Max. :99.98 Max. :98.20
## X11 X12
## Min. : 475488 Min. :0.2140
## 1st Qu.: 557733 1st Qu.:0.2838
## Median : 658232 Median :0.3275
## Mean : 682780 Mean :0.3280
## 3rd Qu.: 771358 3rd Qu.:0.3618
## Max. :1132178 Max. :0.4260
stat_desc <- data.frame(
Variabel = names(data)[-1],
Mean = sapply(data[,-1], mean, na.rm = TRUE),
Min = sapply(data[,-1], min, na.rm = TRUE),
Max = sapply(data[,-1], max, na.rm = TRUE),
SD = sapply(data[,-1], sd, na.rm = TRUE)
)
stat_desc
## Variabel Mean Min Max SD
## Y Y 1.057775e+01 3.420000e+00 2.945000e+01 6.915362e+00
## X1 X1 4.456000e+00 1.490000e+00 6.960000e+00 1.482089e+00
## X2 X2 8.022315e+05 5.598950e+05 1.256747e+06 1.618180e+05
## X3 X3 1.481232e+01 6.651936e+00 2.719814e+01 6.174623e+00
## X4 X4 9.393000e+00 4.760000e+00 1.158000e+01 1.421439e+00
## X5 X5 9.595650e+01 6.832000e+01 9.980000e+01 6.925246e+00
## X6 X6 8.342025e+01 4.178000e+01 9.914000e+01 1.314998e+01
## X7 X7 9.817500e+01 8.002000e+01 1.000000e+02 4.455749e+00
## X8 X8 8.446550e+01 5.454000e+01 9.661000e+01 9.208344e+00
## X9 X9 8.654925e+01 3.289000e+01 9.998000e+01 1.440600e+01
## X10 X10 8.167375e+01 1.634000e+01 9.820000e+01 1.817662e+01
## X11 X11 6.827796e+05 4.754880e+05 1.132178e+06 1.626341e+05
## X12 X12 3.280250e-01 2.140000e-01 4.260000e-01 5.503215e-02
# Standarisasi data (Z-score)
data_model <- data.frame(Y, X)
data_std <- as.data.frame(scale(data_model))
head(data_model)
## Y X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11
## 1 12.22 5.64 741979.7 15.35910 10.42 98.92 85.52 99.52 85.21 92.06 82.21 676247
## 2 7.24 5.32 765842.1 15.36517 10.29 99.01 80.62 99.64 73.16 93.24 87.47 666546
## 3 5.31 5.62 821551.7 18.61072 10.03 99.08 87.45 99.91 73.02 87.26 74.59 729806
## 4 6.30 4.16 844677.8 17.41404 9.80 99.00 64.82 99.88 78.69 92.17 91.21 713117
## 5 6.89 4.26 775091.8 13.54359 9.32 97.59 74.69 99.87 89.38 82.46 85.88 664127
## 6 9.85 3.69 691255.8 13.51207 9.28 98.89 80.45 99.05 85.93 86.82 85.50 581702
## X12
## 1 0.274
## 2 0.283
## 3 0.280
## 4 0.304
## 5 0.291
## 6 0.298
# Model OLS
data_model <- data.frame(Y, X)
model_ols <- lm(Y ~ ., data = data_model)
summary(model_ols)
##
## Call:
## lm(formula = Y ~ ., data = data_model)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.8670 -1.2276 -0.2296 1.0510 4.8260
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.739e+01 1.878e+01 -3.056 0.00500 **
## X1 -1.041e-01 4.973e-01 -0.209 0.83572
## X2 -3.175e-05 8.800e-06 -3.609 0.00123 **
## X3 1.781e-01 9.813e-02 1.815 0.08064 .
## X4 9.647e-01 1.352e+00 0.714 0.48148
## X5 -1.582e-01 2.777e-01 -0.570 0.57349
## X6 -7.068e-02 8.211e-02 -0.861 0.39692
## X7 3.988e-01 3.267e-01 1.221 0.23282
## X8 3.709e-01 1.254e-01 2.959 0.00636 **
## X9 1.065e-01 7.032e-02 1.515 0.14146
## X10 -3.741e-01 1.039e-01 -3.602 0.00126 **
## X11 3.866e-05 8.080e-06 4.785 5.43e-05 ***
## X12 8.460e+01 1.343e+01 6.297 9.70e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.657 on 27 degrees of freedom
## Multiple R-squared: 0.8978, Adjusted R-squared: 0.8524
## F-statistic: 19.77 on 12 and 27 DF, p-value: 2.566e-10
# Uji Normalitas (Kolmogorov-Smirnov)
residuals_ols <- residuals(model_ols)
ks.test(residuals_ols, "pnorm",
mean(residuals_ols),
sd(residuals_ols))
##
## Exact one-sample Kolmogorov-Smirnov test
##
## data: residuals_ols
## D = 0.085947, p-value = 0.9045
## alternative hypothesis: two-sided
# Uji Heteroskedastisitas (Breusch-Pagan)
bptest(model_ols)
##
## studentized Breusch-Pagan test
##
## data: model_ols
## BP = 12.071, df = 12, p-value = 0.44
# Uji Autokorelasi (Durbin-Watson)
dwtest(model_ols)
##
## Durbin-Watson test
##
## data: model_ols
## DW = 1.8631, p-value = 0.2169
## alternative hypothesis: true autocorrelation is greater than 0
# Uji Multikolinearitas (VIF)
vif(model_ols)
## X1 X2 X3 X4 X5 X6 X7 X8
## 3.001873 11.205014 2.028671 20.395120 20.435372 6.442185 11.712765 7.362831
## X9 X10 X11 X12
## 5.670611 19.697793 9.543295 3.020315
Analisis Principal Component Regression (PCR) dilakukan dengan menggunakan seluruh variabel prediktor untuk memodelkan variabel respon. Data terlebih dahulu distandarisasi, kemudian digunakan metode cross-validation untuk menentukan jumlah komponen yang paling optimal. Pemilihan jumlah komponen ini didasarkan pada nilai MSEP terkecil yang diperoleh dari grafik.
Setelah jumlah komponen terbaik didapatkan, model PCR dibentuk kembali dan digunakan untuk melakukan prediksi. Kinerja model kemudian dievaluasi menggunakan nilai MSE untuk melihat besar kesalahan, serta R² untuk mengetahui seberapa baik model menjelaskan data. Dengan cara ini, PCR dapat membantu mengatasi masalah multikolinearitas sekaligus menghasilkan model yang cukup baik.
model_pcr <- pcr(Y ~ .,
data = X,
scale = TRUE,
validation = "CV")
summary(model_pcr)
## Data: X dimension: 40 12
## Y dimension: 40 1
## Fit method: svdpc
## Number of components considered: 12
##
## VALIDATION: RMSEP
## Cross-validated using 10 random segments.
## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps
## CV 7.003 7.887 8.557 4.171 4.476 4.528 4.676
## adjCV 7.003 7.877 8.667 4.054 4.482 4.451 4.592
## 7 comps 8 comps 9 comps 10 comps 11 comps 12 comps
## CV 4.698 5.022 5.684 5.261 4.555 3.503
## adjCV 4.624 4.928 5.542 5.030 4.410 3.428
##
## TRAINING: % variance explained
## 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps 7 comps 8 comps
## X 41.502 59.954 75.62 84.03 91.06 94.95 96.62 98.18
## Y 2.735 6.996 69.52 69.63 76.78 76.82 76.85 77.20
## 9 comps 10 comps 11 comps 12 comps
## X 99.13 99.52 99.82 100.00
## Y 79.91 87.72 88.02 89.78
msep_pcr <- MSEP(model_pcr)
# nilai MSEP tiap komponen
msep_values <- as.vector(msep_pcr$val[1,1,])
# buang komponen ke-0 (intercept)
msep_values <- msep_values[-1]
msep_values
## [1] 62.19916 73.21433 17.39516 20.03615 20.50385 21.86563 22.06924 25.22285
## [9] 32.30640 27.67562 20.74612 12.27422
opt_comp_pcr <- which.min(msep_values)
opt_comp_pcr
## [1] 12
model_pcr_final <- pcr(Y ~ .,
data = X,
scale = TRUE,
ncomp = opt_comp_pcr)
pred_pcr <- predict(model_pcr_final,
newdata = X,
ncomp = opt_comp_pcr)
pred_pcr <- as.vector(pred_pcr)
validationplot(model_pcr, val.type = "MSEP")
mse_pcr <- mean((Y - pred_pcr)^2)
r2_pcr <- cor(Y, pred_pcr)^2
mse_pcr
## [1] 4.763853
loadings(model_pcr)
##
## Loadings:
## Comp 1 Comp 2 Comp 3 Comp 4 Comp 5 Comp 6 Comp 7 Comp 8 Comp 9 Comp 10
## X1 -0.281 0.318 -0.270 -0.799 -0.233
## X2 0.626 -0.176 0.194 0.177 0.121 -0.654
## X3 -0.178 -0.130 -0.270 0.551 -0.587 0.345 -0.241 0.194 -0.120
## X4 -0.418 0.126 -0.105 0.137 0.149 -0.617
## X5 -0.429 -0.253 0.165 0.177 0.238
## X6 -0.190 -0.641 -0.191 0.219 0.539 -0.242 -0.168
## X7 -0.416 0.172 -0.337 0.418 0.457 -0.142
## X8 -0.255 -0.563 0.453 -0.152 -0.258 -0.439
## X9 -0.402 -0.177 0.795 -0.158 0.322
## X10 -0.406 0.102 0.282 0.243 -0.331 -0.394
## X11 0.556 -0.248 0.347 0.228 0.129 0.137 0.135 0.597
## X12 -0.136 0.208 -0.262 -0.715 -0.382 0.224 -0.224 -0.156 0.278
## Comp 11 Comp 12
## X1 0.172
## X2 -0.247
## X3
## X4 0.594
## X5 -0.727 -0.281
## X6 -0.262
## X7 0.489 0.161
## X8 0.335
## X9 0.166
## X10 0.290 -0.565
## X11 0.196
## X12
##
## Comp 1 Comp 2 Comp 3 Comp 4 Comp 5 Comp 6 Comp 7 Comp 8 Comp 9
## SS loadings 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000 1.000
## Proportion Var 0.083 0.083 0.083 0.083 0.083 0.083 0.083 0.083 0.083
## Cumulative Var 0.083 0.167 0.250 0.333 0.417 0.500 0.583 0.667 0.750
## Comp 10 Comp 11 Comp 12
## SS loadings 1.000 1.000 1.000
## Proportion Var 0.083 0.083 0.083
## Cumulative Var 0.833 0.917 1.000
coef(model_pcr_final,
ncomp = opt_comp_pcr,
intercept = TRUE)
## , , 12 comps
##
## Y
## (Intercept) -57.3929215
## X1 -0.1543220
## X2 -5.1385050
## X3 1.0997331
## X4 1.3712985
## X5 -1.0958471
## X6 -0.9294381
## X7 1.7769425
## X8 3.4151282
## X9 1.5344308
## X10 -6.8001704
## X11 6.2880235
## X12 4.6555563
ridge_cv <- cv.glmnet(X_matrix, Y, alpha = 0)
best_lambda <- ridge_cv$lambda.min
ridge_model <- glmnet(X_matrix, Y,
alpha = 0,
lambda = best_lambda)
pred_ridge <- predict(ridge_model,
s = best_lambda,
newx = X_matrix)
mse_ridge <- mean((Y - pred_ridge)^2)
r2_ridge <- cor(Y, pred_ridge)^2
# Ubah data ke matrix
X_matrix <- as.matrix(X)
Y_vector <- as.matrix(Y)
# =========================
# Cross Validation (Ridge)
# =========================
set.seed(123)
ridge_cv <- cv.glmnet(X_matrix, Y, alpha = 0)
best_lambda <- ridge_cv$lambda.min
ridge_model <- glmnet(X_matrix, Y,
alpha = 0,
lambda = best_lambda)
coef(ridge_model)
## 13 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) -9.997004e+00
## X1 2.217004e-01
## X2 1.045301e-06
## X3 2.208897e-01
## X4 -3.786014e-01
## X5 -6.647096e-02
## X6 1.365443e-01
## X7 -5.331720e-02
## X8 1.195750e-01
## X9 -1.544019e-02
## X10 -9.381168e-02
## X11 9.644182e-06
## X12 3.526636e+01
pred_ridge <- predict(ridge_model,
s = best_lambda,
newx = X_matrix)
mse_ridge <- mean((Y - pred_ridge)^2)
r2_ridge <- cor(Y, pred_ridge)^2
# Visualisasi Ridge
plot(ridge_cv)
plot(ridge_model, xvar = "lambda", label = TRUE)
model_pls <- plsr(Y ~ ., data = X,
scale = TRUE, validation = "CV")
summary(model_pls)
## Data: X dimension: 40 12
## Y dimension: 40 1
## Fit method: kernelpls
## Number of components considered: 12
##
## VALIDATION: RMSEP
## Cross-validated using 10 random segments.
## (Intercept) 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps
## CV 7.003 4.803 4.538 5.344 5.916 5.914 5.388
## adjCV 7.003 4.648 4.472 5.202 5.746 5.673 5.179
## 7 comps 8 comps 9 comps 10 comps 11 comps 12 comps
## CV 4.616 4.585 4.321 4.287 4.176 4.167
## adjCV 4.465 4.437 4.193 4.159 4.057 4.049
##
## TRAINING: % variance explained
## 1 comps 2 comps 3 comps 4 comps 5 comps 6 comps 7 comps 8 comps
## X 21.27 56.64 65.76 80.63 83.54 88.58 93.16 96.38
## Y 69.13 76.46 79.61 81.30 87.22 88.18 89.04 89.42
## 9 comps 10 comps 11 comps 12 comps
## X 97.77 98.08 99.68 100.00
## Y 89.61 89.78 89.78 89.78
validationplot(model_pls, val.type = "MSEP")
# Misal 3 komponen
model_pls_final <- plsr(Y ~ ., data = X,
scale = TRUE, ncomp = 12)
pred_pls <- predict(model_pls_final, X, ncomp = 12)
mse_pls <- mean((Y - pred_pls)^2)
r2_pls <- cor(Y, pred_pls)^2
loadings(model_pls)
##
## Loadings:
## Comp 1 Comp 2 Comp 3 Comp 4 Comp 5 Comp 6 Comp 7 Comp 8 Comp 9 Comp 10
## X1 -0.124 0.304 0.609 -0.337 -0.632 0.735 -0.531
## X2 0.278 -0.670 0.753 -0.423 0.109 -0.151 -0.247 0.291
## X3 0.290 0.479 -0.156 -0.573 0.780 -0.652 0.130
## X4 -0.330 0.397 -0.141 0.228 -0.357 0.161 0.568 0.106
## X5 -0.401 0.386 -0.133 0.205 -0.224 -0.492
## X6 0.396 0.286 0.131 -0.560 0.248 -0.211 -0.252 0.185 0.346 -0.189
## X7 -0.387 0.373 -0.181 0.385 -0.113 -0.224 -0.187 0.274 0.155
## X8 0.294 0.186 -0.341 -0.797 0.544 0.218 -0.332 0.187
## X9 -0.335 0.378 -0.118 0.106 0.371 -0.118 -0.512 0.626
## X10 -0.415 0.341 -0.356 -0.234 -0.112 0.309 -0.117 -0.402
## X11 0.326 -0.663 0.678 -0.271 0.424 -0.190 0.197 -0.265
## X12 0.204 0.263 0.227 0.310 -0.348 1.193 -0.604 -0.193
## Comp 11 Comp 12
## X1 0.137 -0.221
## X2 0.135
## X3 0.101
## X4 0.201
## X5 0.212 0.619
## X6 -0.412
## X7 0.204 -0.517
## X8 0.415
## X9 -0.694 0.260
## X10 -0.391
## X11 -0.206
## X12 0.262
##
## Comp 1 Comp 2 Comp 3 Comp 4 Comp 5 Comp 6 Comp 7 Comp 8 Comp 9
## SS loadings 1.196 1.079 1.513 2.583 1.121 2.578 1.344 1.205 1.231
## Proportion Var 0.100 0.090 0.126 0.215 0.093 0.215 0.112 0.100 0.103
## Cumulative Var 0.100 0.190 0.316 0.531 0.624 0.839 0.951 1.052 1.154
## Comp 10 Comp 11 Comp 12
## SS loadings 1.114 1.058 1.000
## Proportion Var 0.093 0.088 0.083
## Cumulative Var 1.247 1.335 1.418
coef(model_pls, ncomp = 12, intercept = TRUE)
## , , 12 comps
##
## Y
## (Intercept) -57.3929215
## X1 -0.1543220
## X2 -5.1385050
## X3 1.0997331
## X4 1.3712985
## X5 -1.0958471
## X6 -0.9294381
## X7 1.7769425
## X8 3.4151282
## X9 1.5344308
## X10 -6.8001704
## X11 6.2880235
## X12 4.6555563
msep_pls <- MSEP(model_pls)
msep_pls$val
## , , model = (Intercept)
##
## response
## estimate Y
## CV 49.04844
## adjCV 49.04844
##
## , , model = 1 comps
##
## response
## estimate Y
## CV 23.06440
## adjCV 21.60415
##
## , , model = 2 comps
##
## response
## estimate Y
## CV 20.59609
## adjCV 19.99643
##
## , , model = 3 comps
##
## response
## estimate Y
## CV 28.55404
## adjCV 27.06026
##
## , , model = 4 comps
##
## response
## estimate Y
## CV 35.00181
## adjCV 33.01325
##
## , , model = 5 comps
##
## response
## estimate Y
## CV 34.97018
## adjCV 32.18528
##
## , , model = 6 comps
##
## response
## estimate Y
## CV 29.02780
## adjCV 26.82373
##
## , , model = 7 comps
##
## response
## estimate Y
## CV 21.30965
## adjCV 19.93519
##
## , , model = 8 comps
##
## response
## estimate Y
## CV 21.01826
## adjCV 19.68885
##
## , , model = 9 comps
##
## response
## estimate Y
## CV 18.66990
## adjCV 17.58068
##
## , , model = 10 comps
##
## response
## estimate Y
## CV 18.37516
## adjCV 17.29437
##
## , , model = 11 comps
##
## response
## estimate Y
## CV 17.43948
## adjCV 16.46162
##
## , , model = 12 comps
##
## response
## estimate Y
## CV 17.36381
## adjCV 16.39420
opt_comp_pls <- which.min(msep_pls$val[1,1,])
opt_comp_pls
## 12 comps
## 13
hasil <- data.frame(
Model = c("PCR", "Ridge", "PLS"),
MSE = c(mse_pcr, mse_ridge, mse_pls),
R2 = c(r2_pcr, r2_ridge, r2_pls)
)
hasil
## Model MSE R2
## 1 PCR 4.763853 0.8978299
## 2 Ridge 10.352875 0.8053681
## 3 PLS 4.763853 0.8978299
# Urutkan model terbaik
hasil[order(-hasil$R2, hasil$MSE), ]
## Model MSE R2
## 3 PLS 4.763853 0.8978299
## 1 PCR 4.763853 0.8978299
## 2 Ridge 10.352875 0.8053681
summary(model_ols)
##
## Call:
## lm(formula = Y ~ ., data = data_model)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.8670 -1.2276 -0.2296 1.0510 4.8260
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.739e+01 1.878e+01 -3.056 0.00500 **
## X1 -1.041e-01 4.973e-01 -0.209 0.83572
## X2 -3.175e-05 8.800e-06 -3.609 0.00123 **
## X3 1.781e-01 9.813e-02 1.815 0.08064 .
## X4 9.647e-01 1.352e+00 0.714 0.48148
## X5 -1.582e-01 2.777e-01 -0.570 0.57349
## X6 -7.068e-02 8.211e-02 -0.861 0.39692
## X7 3.988e-01 3.267e-01 1.221 0.23282
## X8 3.709e-01 1.254e-01 2.959 0.00636 **
## X9 1.065e-01 7.032e-02 1.515 0.14146
## X10 -3.741e-01 1.039e-01 -3.602 0.00126 **
## X11 3.866e-05 8.080e-06 4.785 5.43e-05 ***
## X12 8.460e+01 1.343e+01 6.297 9.70e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.657 on 27 degrees of freedom
## Multiple R-squared: 0.8978, Adjusted R-squared: 0.8524
## F-statistic: 19.77 on 12 and 27 DF, p-value: 2.566e-10
# Variabel signifikan
coef_table <- summary(model_ols)$coefficients
signif_var <- coef_table[coef_table[,4] < 0.05, ]
signif_var
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.739292e+01 1.877886e+01 -3.056252 5.003315e-03
## X2 -3.175484e-05 8.799832e-06 -3.608574 1.234247e-03
## X8 3.708732e-01 1.253533e-01 2.958623 6.357532e-03
## X10 -3.741162e-01 1.038701e-01 -3.601770 1.256191e-03
## X11 3.866362e-05 8.080394e-06 4.784868 5.430039e-05
## X12 8.459702e+01 1.343398e+01 6.297243 9.704124e-07