Library

# memuat library yang dibutuhkan
library(caret)

## Loading required package: ggplot2

## Loading required package: lattice

library(ggplot2)
library(corrplot)

## corrplot 0.92 loaded

library(cowplot)
library(glmnet)

## Loading required package: Matrix

## Loaded glmnet 4.1-8

library(pROC)

## Type 'citation("pROC")' for a citation.

## 
## Attaching package: 'pROC'

## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var

Eksplorasi Data

# Membaca data
library(readxl)
data <- read_xlsx("C:/Users/HAKIM/Downloads/Data TPM.xlsx", sheet = "Gabungan")

# Melihat Struktur data
str(data)

## tibble [119 × 6] (S3: tbl_df/tbl/data.frame)
##  $ Y : num [1:119] 13.67 9.82 11.17 9.18 9.72 ...
##  $ X1: num [1:119] 97.1 98.4 97.4 97 95.6 ...
##  $ X2: num [1:119] 2.28 1.84 1.91 2.18 2.21 2.11 2.1 1.71 2.02 1.9 ...
##  $ X3: num [1:119] 74.2 85.4 82.8 82.2 82.3 ...
##  $ X4: num [1:119] 14.87 3.43 4.16 4.51 4.25 ...
##  $ X5: num [1:119] 98.7 99.2 99 99.7 98.7 ...

Variabel	Keterangan
Y	Angka Kematian Bayi — jumlah kematian bayi per 1.000 kelahiran hidup dalam satu tahun di suatu wilayah.
X1	Angka Harapan Hidup — rata-rata perkiraan lama hidup seseorang sejak lahir.
X2	Total Fertility Rate — rata-rata jumlah anak yang dilahirkan oleh seorang wanita selama masa suburnya.
X3	Indeks Pembangunan Manusia — indeks komposit yang mencerminkan kualitas hidup manusia melalui pendidikan, kesehatan, dan pendapatan.
X4	Persentase Kemiskinan — persentase penduduk yang berada di bawah garis kemiskinan di suatu wilayah.
X5	Angka Melek Huruf Ibu — persentase ibu yang dapat membaca dan menulis di suatu wilayah.

# Melihat summary data
summary(data)

##        Y               X1              X2              X3       
##  Min.   : 9.18   Min.   :77.60   Min.   :1.540   Min.   :64.75  
##  1st Qu.:11.71   1st Qu.:93.01   1st Qu.:1.915   1st Qu.:69.61  
##  Median :13.24   Median :96.07   Median :2.050   Median :72.48  
##  Mean   :13.39   Mean   :95.04   Mean   :2.034   Mean   :73.54  
##  3rd Qu.:14.71   3rd Qu.:98.58   3rd Qu.:2.160   3rd Qu.:76.53  
##  Max.   :19.66   Max.   :99.95   Max.   :2.450   Max.   :86.93  
##        X4               X5       
##  Min.   : 2.290   Min.   :77.60  
##  1st Qu.: 7.170   1st Qu.:93.01  
##  Median : 9.940   Median :96.07  
##  Mean   : 9.927   Mean   :95.04  
##  3rd Qu.:12.430   3rd Qu.:98.58  
##  Max.   :22.780   Max.   :99.95

# Melihat nilai NA
colSums(is.na(data))

##  Y X1 X2 X3 X4 X5 
##  0  0  0  0  0  0

# menghitung korelasi antar kolom
corr_matrix <- round(cor(data), 2)

# membuat plot korelasi
corrplot(corr_matrix, 
         type="lower",
         method = "color", 
         tl.cex = 0.5, 
         tl.col = "black",
         addCoef.col = "#2F2F2F",
         addCoefasPercent = FALSE,
         number.cex = 0.5,
         diag = FALSE)

Pembagian Data

set.seed(039)

# Menentukan proporsi data untuk training (misalnya, 80%) dan testing (misalnya, 20%)
prop_train <- 0.8

index <- createDataPartition(data$Y, p = prop_train, list = FALSE)

# Membagi dataset
training_data <- data[index, ]
testing_data <- data[-index, ]

Pemodelan

# Melatih model regresi linear
model_reg <- lm(Y ~ ., data = training_data)
car::vif(model_reg)

##       X1       X2       X3       X4       X5 
## 1.241477 1.027043 1.798745 1.929327 1.322506

summary(model_reg)

## 
## Call:
## lm(formula = Y ~ ., data = training_data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2134 -0.9348 -0.0071  0.6620  5.1071 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 27.47614    5.86404   4.686 9.86e-06 ***
## X1          -0.03783    0.03629  -1.043   0.3000    
## X2           1.00793    0.87529   1.152   0.2526    
## X3          -0.26834    0.03865  -6.943 5.78e-10 ***
## X4           0.09469    0.05273   1.796   0.0759 .  
## X5           0.06521    0.04741   1.375   0.1724    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.434 on 90 degrees of freedom
## Multiple R-squared:  0.5829, Adjusted R-squared:  0.5597 
## F-statistic: 25.16 on 5 and 90 DF,  p-value: 8.776e-16

Deteksi Pencilan dan outlier

#titik pencilan
sisaan<-residuals(model_reg)
stdr.sisaan<-rstandard(model_reg)
mut.stdr.sisaan<-abs(stdr.sisaan)
pencilll <- data.frame(sort(mut.stdr.sisaan))
pencilan <- data.frame(pencilll, pencilll>2)
pencilan

##    sort.mut.stdr.sisaan. sort.mut.stdr.sisaan..1
## 52          2.934199e-05                   FALSE
## 29          1.020845e-02                   FALSE
## 33          2.387866e-02                   FALSE
## 4           2.692892e-02                   FALSE
## 49          2.706920e-02                   FALSE
## 45          5.058162e-02                   FALSE
## 10          5.547483e-02                   FALSE
## 9           6.777830e-02                   FALSE
## 57          7.428544e-02                   FALSE
## 59          9.271648e-02                   FALSE
## 22          9.432184e-02                   FALSE
## 21          1.071630e-01                   FALSE
## 95          1.344607e-01                   FALSE
## 88          1.483334e-01                   FALSE
## 7           1.670029e-01                   FALSE
## 14          1.861112e-01                   FALSE
## 6           1.878320e-01                   FALSE
## 43          1.922635e-01                   FALSE
## 25          2.019830e-01                   FALSE
## 74          2.128772e-01                   FALSE
## 68          2.308358e-01                   FALSE
## 34          2.356360e-01                   FALSE
## 96          2.415794e-01                   FALSE
## 77          2.420103e-01                   FALSE
## 2           2.443759e-01                   FALSE
## 30          2.591992e-01                   FALSE
## 1           2.675704e-01                   FALSE
## 20          2.834002e-01                   FALSE
## 26          2.852262e-01                   FALSE
## 89          2.922638e-01                   FALSE
## 83          3.048645e-01                   FALSE
## 48          3.075783e-01                   FALSE
## 23          3.217419e-01                   FALSE
## 24          3.251137e-01                   FALSE
## 11          3.360381e-01                   FALSE
## 5           3.503375e-01                   FALSE
## 16          4.124900e-01                   FALSE
## 61          4.525938e-01                   FALSE
## 60          4.772302e-01                   FALSE
## 55          4.956599e-01                   FALSE
## 42          4.971182e-01                   FALSE
## 71          5.033830e-01                   FALSE
## 18          5.114785e-01                   FALSE
## 66          5.376247e-01                   FALSE
## 76          5.474967e-01                   FALSE
## 13          5.491789e-01                   FALSE
## 87          5.555210e-01                   FALSE
## 62          5.727714e-01                   FALSE
## 15          5.745755e-01                   FALSE
## 17          5.827126e-01                   FALSE
## 56          5.849224e-01                   FALSE
## 86          6.663865e-01                   FALSE
## 81          6.753652e-01                   FALSE
## 92          6.783020e-01                   FALSE
## 12          6.848629e-01                   FALSE
## 47          6.850604e-01                   FALSE
## 28          6.908164e-01                   FALSE
## 51          7.052784e-01                   FALSE
## 39          7.248910e-01                   FALSE
## 58          7.579051e-01                   FALSE
## 8           7.861058e-01                   FALSE
## 27          8.122294e-01                   FALSE
## 73          8.184426e-01                   FALSE
## 75          8.370506e-01                   FALSE
## 54          8.657657e-01                   FALSE
## 19          8.658475e-01                   FALSE
## 41          8.878073e-01                   FALSE
## 63          9.109013e-01                   FALSE
## 72          9.562364e-01                   FALSE
## 53          9.873582e-01                   FALSE
## 85          1.005506e+00                   FALSE
## 31          1.020415e+00                   FALSE
## 37          1.025272e+00                   FALSE
## 40          1.076151e+00                   FALSE
## 36          1.100870e+00                   FALSE
## 65          1.116616e+00                   FALSE
## 82          1.162571e+00                   FALSE
## 69          1.210410e+00                   FALSE
## 3           1.213325e+00                   FALSE
## 80          1.231850e+00                   FALSE
## 46          1.278879e+00                   FALSE
## 67          1.342223e+00                   FALSE
## 64          1.431922e+00                   FALSE
## 94          1.446401e+00                   FALSE
## 44          1.453913e+00                   FALSE
## 35          1.610470e+00                   FALSE
## 93          1.624643e+00                   FALSE
## 50          1.973746e+00                   FALSE
## 32          2.031145e+00                    TRUE
## 79          2.069946e+00                    TRUE
## 90          2.144115e+00                    TRUE
## 84          2.159584e+00                    TRUE
## 91          2.235606e+00                    TRUE
## 38          2.260498e+00                    TRUE
## 78          2.359825e+00                    TRUE
## 70          3.716813e+00                    TRUE

#titik leverage
hi<-hatvalues(model_reg, infl = influence(model_reg))
ambang_batas<-2*(10/34) #2p/n, p=10, n=34
leveragee <- data.frame(hi, hi>ambang_batas)
leveragee

##            hi hi...ambang_batas
## 1  0.08415773             FALSE
## 2  0.08289759             FALSE
## 3  0.05785058             FALSE
## 4  0.04871496             FALSE
## 5  0.09778723             FALSE
## 6  0.04767867             FALSE
## 7  0.03433197             FALSE
## 8  0.10253424             FALSE
## 9  0.06031376             FALSE
## 10 0.07324402             FALSE
## 11 0.05882564             FALSE
## 12 0.03492227             FALSE
## 13 0.09876190             FALSE
## 14 0.06009103             FALSE
## 15 0.04697435             FALSE
## 16 0.04703569             FALSE
## 17 0.04562092             FALSE
## 18 0.04406433             FALSE
## 19 0.03827881             FALSE
## 20 0.03081689             FALSE
## 21 0.08799578             FALSE
## 22 0.01645603             FALSE
## 23 0.05837341             FALSE
## 24 0.07978175             FALSE
## 25 0.04753211             FALSE
## 26 0.05535826             FALSE
## 27 0.04654154             FALSE
## 28 0.04282208             FALSE
## 29 0.04475618             FALSE
## 30 0.04923337             FALSE
## 31 0.11550119             FALSE
## 32 0.03780361             FALSE
## 33 0.09466140             FALSE
## 34 0.04166758             FALSE
## 35 0.03676084             FALSE
## 36 0.04574993             FALSE
## 37 0.03972319             FALSE
## 38 0.01738378             FALSE
## 39 0.02382388             FALSE
## 40 0.03098018             FALSE
## 41 0.05670686             FALSE
## 42 0.05595835             FALSE
## 43 0.04171766             FALSE
## 44 0.13475185             FALSE
## 45 0.10390278             FALSE
## 46 0.07237469             FALSE
## 47 0.09068451             FALSE
## 48 0.10740095             FALSE
## 49 0.02719486             FALSE
## 50 0.04832489             FALSE
## 51 0.03001109             FALSE
## 52 0.04567590             FALSE
## 53 0.06132746             FALSE
## 54 0.08511344             FALSE
## 55 0.05773588             FALSE
## 56 0.06003468             FALSE
## 57 0.08287526             FALSE
## 58 0.21481991             FALSE
## 59 0.11018171             FALSE
## 60 0.09296809             FALSE
## 61 0.11174571             FALSE
## 62 0.03849529             FALSE
## 63 0.02600452             FALSE
## 64 0.03651257             FALSE
## 65 0.02651001             FALSE
## 66 0.02494046             FALSE
## 67 0.02412986             FALSE
## 68 0.05460983             FALSE
## 69 0.04416452             FALSE
## 70 0.08195865             FALSE
## 71 0.01945827             FALSE
## 72 0.06327741             FALSE
## 73 0.03235638             FALSE
## 74 0.06116080             FALSE
## 75 0.03440819             FALSE
## 76 0.02127041             FALSE
## 77 0.04650627             FALSE
## 78 0.06808932             FALSE
## 79 0.09654514             FALSE
## 80 0.15873875             FALSE
## 81 0.10010824             FALSE
## 82 0.18317253             FALSE
## 83 0.06243881             FALSE
## 84 0.03429651             FALSE
## 85 0.03883731             FALSE
## 86 0.04450664             FALSE
## 87 0.05842917             FALSE
## 88 0.04715074             FALSE
## 89 0.05444424             FALSE
## 90 0.05147024             FALSE
## 91 0.06951863             FALSE
## 92 0.04341517             FALSE
## 93 0.11356305             FALSE
## 94 0.12434776             FALSE
## 95 0.04911384             FALSE
## 96 0.06673529             FALSE

# Plot titik pencilan dan leverage
library(olsrr)

## 
## Attaching package: 'olsrr'

## The following object is masked from 'package:datasets':
## 
##     rivers

ols_plot_resid_lev(model_reg)

# Jarak Cook
di<-cooks.distance(model_reg)
f<-qf(0.05,10,24, lower.tail = F) # qf(p,db1,db2, lower.tail=F). db1=n-p, db2=n-p, lower.tail=F untuk p=taraf nyata(alpha) dan sebaliknya untuk p= (1-alpha)
data.frame(di, di>f)

##              di di...f
## 1  1.096474e-03  FALSE
## 2  8.996828e-04  FALSE
## 3  1.506576e-02  FALSE
## 4  6.189255e-06  FALSE
## 5  2.217150e-03  FALSE
## 6  2.943936e-04  FALSE
## 7  1.652601e-04  FALSE
## 8  1.176689e-02  FALSE
## 9  4.914322e-05  FALSE
## 10 4.053662e-05  FALSE
## 11 1.176312e-03  FALSE
## 12 2.828760e-03  FALSE
## 13 5.508411e-03  FALSE
## 14 3.690775e-04  FALSE
## 15 2.712059e-03  FALSE
## 16 1.399672e-03  FALSE
## 17 2.705209e-03  FALSE
## 18 2.009842e-03  FALSE
## 19 4.973256e-03  FALSE
## 20 4.256298e-04  FALSE
## 21 1.846730e-04  FALSE
## 22 2.480873e-05  FALSE
## 23 1.069548e-03  FALSE
## 24 1.527327e-03  FALSE
## 25 3.393244e-04  FALSE
## 26 7.945896e-04  FALSE
## 27 5.367168e-03  FALSE
## 28 3.558354e-03  FALSE
## 29 8.137799e-07  FALSE
## 30 5.798313e-04  FALSE
## 31 2.266167e-02  FALSE
## 32 2.701470e-02  FALSE
## 33 9.936432e-06  FALSE
## 34 4.023593e-04  FALSE
## 35 1.649700e-02  FALSE
## 36 9.683868e-03  FALSE
## 37 7.247275e-03  FALSE
## 38 1.506667e-02  FALSE
## 39 2.137364e-03  FALSE
## 40 6.170875e-03  FALSE
## 41 7.897235e-03  FALSE
## 42 2.441416e-03  FALSE
## 43 2.682064e-04  FALSE
## 44 5.486808e-02  FALSE
## 45 4.944317e-05  FALSE
## 46 2.126776e-02  FALSE
## 47 7.800545e-03  FALSE
## 48 1.897194e-03  FALSE
## 49 3.413978e-06  FALSE
## 50 3.296958e-02  FALSE
## 51 2.564985e-03  FALSE
## 52 6.867823e-12  FALSE
## 53 1.061547e-02  FALSE
## 54 1.162199e-02  FALSE
## 55 2.508936e-03  FALSE
## 56 3.641963e-03  FALSE
## 57 8.310988e-05  FALSE
## 58 2.619290e-02  FALSE
## 59 1.774070e-04  FALSE
## 60 3.890594e-03  FALSE
## 61 4.294964e-03  FALSE
## 62 2.189110e-03  FALSE
## 63 3.692184e-03  FALSE
## 64 1.295041e-02  FALSE
## 65 5.658941e-03  FALSE
## 66 1.232198e-03  FALSE
## 67 7.424390e-03  FALSE
## 68 5.129971e-04  FALSE
## 69 1.128247e-02  FALSE
## 70 2.055524e-01  FALSE
## 71 8.380771e-04  FALSE
## 72 1.029478e-02  FALSE
## 73 3.733101e-03  FALSE
## 74 4.920269e-04  FALSE
## 75 4.161217e-03  FALSE
## 76 1.085738e-03  FALSE
## 77 4.761130e-04  FALSE
## 78 6.781302e-02  FALSE
## 79 7.631161e-02  FALSE
## 80 4.772175e-02  FALSE
## 81 8.456792e-03  FALSE
## 82 5.051465e-02  FALSE
## 83 1.031615e-03  FALSE
## 84 2.760546e-02  FALSE
## 85 6.808796e-03  FALSE
## 86 3.447453e-03  FALSE
## 87 3.191732e-03  FALSE
## 88 1.814642e-04  FALSE
## 89 8.197161e-04  FALSE
## 90 4.157672e-02  FALSE
## 91 6.223471e-02  FALSE
## 92 3.480270e-03  FALSE
## 93 5.635776e-02  FALSE
## 94 4.951449e-02  FALSE
## 95 1.556377e-04  FALSE
## 96 6.955356e-04  FALSE

# Jarak Cook
library(ggplot2)
cooks_crit = f
model_cooks <- cooks.distance(model_reg)
df <- data.frame(obs = names(model_cooks),
                 cooks = model_cooks)
ggplot(df, aes(y = cooks, x = obs)) +
  geom_point() +
  geom_hline(yintercept = cooks_crit, linetype="dashed") +
  labs(title = "Cook's Distance",
       subtitle = "Influential Observation ",
       x = "Observation Number",
       y = "Cook's")

tidak terdapat pencilan

#Deteksi amatan berpengaruh (dfbetas)
ols_plot_dfbetas(model_reg)

amatan yang berada di luar batas garis merah adalah amatan berpengaruh

#Deteksi amatan berpengaruh (dffits)
ols_plot_dffits(model_reg)

44, 70, 78, 79, 80, 82, 90, 91, 93, 94 amatan berpengaruh (amatan yang berada di luar batas garis merah adalah amatan berpengaruh)

Pemodelan Regresi Sementara

amatan 38 dihapuskan karena titik leverage yang bukan amatan berpengaruh Setelah mencoba berbagai kombinasi, diperoleh kombinasi terbaik didapatkan dari menghapus amatan 38, 58, 80.

#model_reg <- lm(Y ~ ., data = training_data[-c(84,90,78,91,70,79,50,44,82),])

#summary(model_reg)

Uji Asumsi

# Asumsi GAUSS MARKOV
# 1 Nilai harapan sisaan sama dengan nol
t.test(model_reg$residuals,
       mu = 0,
       conf.level = 0.95) #asumsi nilai harapan sama dengan 0 terpenuhi

## 
##  One Sample t-test
## 
## data:  model_reg$residuals
## t = 1.6411e-16, df = 95, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.2828185  0.2828185
## sample estimates:
##    mean of x 
## 2.337896e-17

terpenuhi

# 2 Sisaan saling bebas
library(lmtest)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(randtests)
runs.test(model_reg$residuals) #asumsi sisaan saling bebas terpenuhi

## 
##  Runs Test
## 
## data:  model_reg$residuals
## statistic = -1.8469, runs = 40, n1 = 48, n2 = 48, n = 96, p-value =
## 0.06477
## alternative hypothesis: nonrandomness

terpenuhi

# 3 Ragam sisaan homogen
library(lmtest)
bptest(model_reg)

## 
##  studentized Breusch-Pagan test
## 
## data:  model_reg
## BP = 10.402, df = 5, p-value = 0.06461

terpenuhi

# ASUMSI NORMALITAS SISAAN
shapiro.test(model_reg$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  model_reg$residuals
## W = 0.96836, p-value = 0.02019

nortest::ad.test(model_reg$residuals)

## 
##  Anderson-Darling normality test
## 
## data:  model_reg$residuals
## A = 0.78116, p-value = 0.04116

asumsi normalitas terpenuhi

# Multikolinearitas
car::vif(model_reg)

##       X1       X2       X3       X4       X5 
## 1.241477 1.027043 1.798745 1.929327 1.322506

VIF<10, tidak ada multikolinearitas

Regresi Stepwise

model_stepwise <- MASS::stepAIC(model_reg, direction="both")

## Start:  AIC=75.02
## Y ~ X1 + X2 + X3 + X4 + X5
## 
##        Df Sum of Sq    RSS     AIC
## - X1    1     2.235 187.32  74.175
## - X2    1     2.727 187.82  74.427
## - X5    1     3.891 188.98  75.020
## <none>              185.09  75.023
## - X4    1     6.633 191.72  76.403
## - X3    1    99.126 284.21 114.196
## 
## Step:  AIC=74.18
## Y ~ X2 + X3 + X4 + X5
## 
##        Df Sum of Sq    RSS     AIC
## - X5    1     2.196 189.52  73.294
## - X2    1     3.377 190.70  73.890
## <none>              187.32  74.175
## + X1    1     2.235 185.09  75.023
## - X4    1     6.269 193.59  75.335
## - X3    1   101.253 288.58 113.659
## 
## Step:  AIC=73.29
## Y ~ X2 + X3 + X4
## 
##        Df Sum of Sq    RSS     AIC
## - X2    1     3.578 193.10  73.089
## <none>              189.52  73.294
## - X4    1     4.836 194.35  73.713
## + X5    1     2.196 187.32  74.175
## + X1    1     0.540 188.98  75.020
## - X3    1   103.765 293.29 113.212
## 
## Step:  AIC=73.09
## Y ~ X3 + X4
## 
##        Df Sum of Sq    RSS     AIC
## <none>              193.10  73.089
## + X2    1     3.578 189.52  73.294
## + X5    1     2.396 190.70  73.890
## - X4    1     5.740 198.84  73.901
## + X1    1     0.817 192.28  74.682
## - X3    1   102.701 295.80 112.032

best_model <- summary(model_stepwise)

Uji Asumsi

# Asumsi GAUSS MARKOV
# 1 Nilai harapan sisaan sama dengan nol
t.test(model_stepwise$residuals,
       mu = 0,
       conf.level = 0.95) #asumsi nilai harapan sama dengan 0 terpenuhi

## 
##  One Sample t-test
## 
## data:  model_stepwise$residuals
## t = 4.471e-16, df = 95, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.2888724  0.2888724
## sample estimates:
##    mean of x 
## 6.505665e-17

terpenuhi

# 2 Sisaan saling bebas
library(lmtest)
library(randtests)
runs.test(model_stepwise$residuals) #asumsi sisaan saling bebas terpenuhi

## 
##  Runs Test
## 
## data:  model_stepwise$residuals
## statistic = -2.0521, runs = 39, n1 = 48, n2 = 48, n = 96, p-value =
## 0.04016
## alternative hypothesis: nonrandomness

terpenuhi

# 3 Ragam sisaan homogen
library(lmtest)
bptest(model_stepwise)

## 
##  studentized Breusch-Pagan test
## 
## data:  model_stepwise
## BP = 6.0411, df = 2, p-value = 0.04878

terpenuhi

# ASUMSI NORMALITAS SISAAN
shapiro.test(model_stepwise$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  model_stepwise$residuals
## W = 0.97495, p-value = 0.06264

nortest::ad.test(model_stepwise$residuals)

## 
##  Anderson-Darling normality test
## 
## data:  model_stepwise$residuals
## A = 0.72783, p-value = 0.05586

asumsi normalitas terpenuhi

# Multikolinearitas
car::vif(model_stepwise)

##       X3       X4 
## 1.782551 1.782551

tidak ada multikol

Evaluasi Model

preds_reg <- predict(model_stepwise, newdata = testing_data)

# Package Metrics
# install.packages("Metrics")
MAE<-Metrics::mae(testing_data$Y, preds_reg)
MSE<-Metrics::mse(testing_data$Y, preds_reg)
RMSE<-Metrics::rmse(testing_data$Y, preds_reg)
MAPE<-Metrics::mape(testing_data$Y, preds_reg)*100
print(paste("MAE:", MAE))

## [1] "MAE: 1.51049841843366"

print(paste("MSE:", MSE))

## [1] "MSE: 4.06428413333408"

print(paste("RMSE:", RMSE))

## [1] "RMSE: 2.01600697750134"

print(paste("MAPE:", MAPE))

## [1] "MAPE: 10.7251890322102"

# Hitung R-squared
r_squared <- 1 - sum((preds_reg - testing_data$Y)^2) / sum((testing_data$Y - mean(testing_data$Y))^2)
print(paste("R-squared:", r_squared))

## [1] "R-squared: 0.448767949592585"

Analisis Regresi - Teknik Pembelajaran Mesin

Hakim Zoelva Mahesa - G1401211039

2024-05-31

Library

Eksplorasi Data

Pembagian Data

Pemodelan

Deteksi Pencilan dan outlier

Pemodelan Regresi Sementara

Uji Asumsi

Regresi Stepwise

Uji Asumsi

Evaluasi Model