Tugas Akhir Analisis Regresi

Packages

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(ggridges)
library(treemap)
library(treemapify)
library(GGally)

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

library(plotly)

## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout

library(ggplot2)
library(readxl)
library(car)

## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some

library(dplyr)
library(openxlsx)
library(randtests)
library(lmtest)

## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(olsrr)

## 
## Attaching package: 'olsrr'
## 
## The following object is masked from 'package:datasets':
## 
##     rivers

library(corrplot)

## corrplot 0.92 loaded

library(nortest)
library(MASS)

## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:olsrr':
## 
##     cement
## 
## The following object is masked from 'package:plotly':
## 
##     select
## 
## The following object is masked from 'package:dplyr':
## 
##     select

Data

data <- read_excel("/Users/user/Downloads/Documents/Anreg /Tugas Akhir/DATASET ANREG.xlsx")

## New names:
## • `` -> `...17`
## • `` -> `...18`
## • `` -> `...19`

Y<-data$indeks_kesehatan
X0<-rep(1,27)
X1<-data$minum
X2<-data$sanitasi
X3<-data$kurus
X4<-data$stunting
X5<-data$imunisasi
X6<-data$hipertensi
X7<-data$diare
X8<-data$nafas
X9<-data$rumah_sakit
X10<-data$dokter
X11<-data$posyandu
X12<-data$bersih
X13<-data$jaminan
X14<-data$bidan
data<-data.frame(cbind(Y,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14))
data[] <- lapply(data, as.numeric)
head(data)

##       Y    X1     X2    X3    X4     X5     X6     X7     X8 X9 X10  X11    X12
## 1 79.46 92.28  68.60 16991 18666 103642  12185 166464 146427 30 596 3009 552261
## 2 79.29 85.70 100.00  5998 10231  36605 556178  68565  47222  9 173 3233 382327
## 3 77.82 80.79  92.89  3146  6871  46859  58089  63213  71337  5  90 2838 534689
## 4 83.09 93.79 100.00 10024 21018  61780 397464 102889  77467 15 247 4010 388891
## 5 79.77 78.15  93.47  8675 30266  41393 160510  71766  22451  7  98 3546 366250
## 6 76.85 89.35  89.74  5104 14122  28534   5211  48901  40549  2  32 2023 226236
##     X13  X14
## 1 63.99 1950
## 2 55.99 1516
## 3 48.16 1476
## 4 58.02  876
## 5 43.32 1460
## 6 41.21 1140

str(data)

## 'data.frame':    27 obs. of  15 variables:
##  $ Y  : num  79.5 79.3 77.8 83.1 79.8 ...
##  $ X1 : num  92.3 85.7 80.8 93.8 78.2 ...
##  $ X2 : num  68.6 100 92.9 100 93.5 ...
##  $ X3 : num  16991 5998 3146 10024 8675 ...
##  $ X4 : num  18666 10231 6871 21018 30266 ...
##  $ X5 : num  103642 36605 46859 61780 41393 ...
##  $ X6 : num  12185 556178 58089 397464 160510 ...
##  $ X7 : num  166464 68565 63213 102889 71766 ...
##  $ X8 : num  146427 47222 71337 77467 22451 ...
##  $ X9 : num  30 9 5 15 7 2 6 12 12 5 ...
##  $ X10: num  596 173 90 247 98 32 63 125 321 63 ...
##  $ X11: num  3009 3233 2838 4010 3546 ...
##  $ X12: num  552261 382327 534689 388891 366250 ...
##  $ X13: num  64 56 48.2 58 43.3 ...
##  $ X14: num  1950 1516 1476 876 1460 ...

Persamaan Awal Regresi

model <- lm(Y ~ X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+X12+X13+X14 ,data=data)
model

## 
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + 
##     X10 + X11 + X12 + X13 + X14, data = data)
## 
## Coefficients:
## (Intercept)           X1           X2           X3           X4           X5  
##   6.751e+01    1.310e-01    7.944e-03    2.929e-04   -2.873e-06    7.583e-05  
##          X6           X7           X8           X9          X10          X11  
##   1.834e-06   -8.960e-05   -2.063e-05    1.545e-01    9.005e-04    9.614e-04  
##         X12          X13          X14  
##   3.809e-06    2.342e-03   -2.941e-03

summary(model)

## 
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + 
##     X10 + X11 + X12 + X13 + X14, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4303 -0.4708 -0.1421  0.6426  2.0986 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.751e+01  1.013e+01   6.665 2.31e-05 ***
## X1           1.310e-01  8.994e-02   1.456   0.1710    
## X2           7.944e-03  2.756e-02   0.288   0.7781    
## X3           2.929e-04  3.510e-04   0.834   0.4203    
## X4          -2.873e-06  1.195e-04  -0.024   0.9812    
## X5           7.583e-05  1.649e-04   0.460   0.6538    
## X6           1.834e-06  4.292e-06   0.427   0.6767    
## X7          -8.960e-05  9.133e-05  -0.981   0.3460    
## X8          -2.063e-05  4.727e-05  -0.436   0.6703    
## X9           1.545e-01  8.501e-02   1.817   0.0942 .  
## X10          9.005e-04  1.539e-03   0.585   0.5693    
## X11          9.614e-04  1.139e-03   0.844   0.4150    
## X12          3.809e-06  4.111e-06   0.926   0.3725    
## X13          2.342e-03  3.684e-02   0.064   0.9503    
## X14         -2.941e-03  1.364e-03  -2.157   0.0520 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.475 on 12 degrees of freedom
## Multiple R-squared:  0.7824, Adjusted R-squared:  0.5284 
## F-statistic: 3.081 on 14 and 12 DF,  p-value: 0.02895

Eksplorasi Data

Hubungan tiap peubah x terhadap peubah y

y.bar <- mean(Y)

## Warning in mean.default(Y): argument is not numeric or logical: returning NA

interactive.plot <- ggplot(data) +
  geom_point(aes(x = X1,y = Y),color="coral",shape=8, size=1) +
  geom_smooth(aes(x = X1, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
  ggtitle("Indeks Kesehatan vs Akses Air Minum Layak") +
  ylab("Indeks Kesehatan") +
  xlab("Akses Air Minum Layak (persen)") + 
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)

## `geom_smooth()` using formula = 'y ~ x'

y.bar <- mean(Y)

## Warning in mean.default(Y): argument is not numeric or logical: returning NA

interactive.plot <- ggplot(data) +
  geom_point(aes(x = X2,y = Y),color="chocolate",shape=8, size=1) +
  geom_smooth(aes(x = X2, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
  ggtitle("Indeks Kesehatan vs Safety & Akses Sanitasi Layak") +
  ylab("Indeks Kesehatan") +
  xlab("Akses Sanitasi Layak (persen") + 
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)

## `geom_smooth()` using formula = 'y ~ x'

y.bar <- mean(Y)

## Warning in mean.default(Y): argument is not numeric or logical: returning NA

interactive.plot <- ggplot(data) +
  geom_point(aes(x = X3,y = Y),color="darkgoldenrod3",shape=8, size=1) +
  geom_smooth(aes(x = X3, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
  ggtitle("Indeks Kesehatan vs Jumlah Balita Kurus") +
  ylab("Indeks Kesehatan") +
  xlab("Jumlah Balita Kurus (orang)") + 
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)

## `geom_smooth()` using formula = 'y ~ x'

y.bar <- mean(Y)

## Warning in mean.default(Y): argument is not numeric or logical: returning NA

interactive.plot <- ggplot(data) +
  geom_point(aes(x = X4,y = Y),color="deepskyblue4",shape=8, size=1) +
  geom_smooth(aes(x = X4, y =Y), method = "lm", se = FALSE, color = "cornsilk3") +
  ggtitle("Indeks Kesehatan vs Jumlah Balita Kurang Gizi/Stunting") +
  ylab("Indeks Kesehatan") +
  xlab("Jumlah Balita Kurang Gizi/Stunting (orang)") + 
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)

## `geom_smooth()` using formula = 'y ~ x'

y.bar <- mean(Y)

## Warning in mean.default(Y): argument is not numeric or logical: returning NA

interactive.plot <- ggplot(data) +
  geom_point(aes(x = X5,y = Y),color="blueviolet",shape=8, size=1) +
  geom_smooth(aes(x = X5, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
  ggtitle("Indeks Kesehatan vs Jumlah Bayi yang Mendapat Imunisasi") +
  ylab("Indeks Kesehatan") +
  xlab("Jumlah Bayi yang Mendapat Imunisasi (orang)") + 
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)

## `geom_smooth()` using formula = 'y ~ x'

y.bar <- mean(Y)

## Warning in mean.default(Y): argument is not numeric or logical: returning NA

interactive.plot <- ggplot(data) +
  geom_point(aes(x = X6,y = Y),color="chartreuse4",shape=8, size=1) +
  geom_smooth(aes(x = X6, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
  ggtitle("Indeks Kesehatan vs Jumlah Kasus Hipertensi") +
  ylab("Indeks Kesehatan") +
  xlab("Jumlah Kasus Hipertensi (orang)") + 
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)

## `geom_smooth()` using formula = 'y ~ x'

y.bar <- mean(Y)

## Warning in mean.default(Y): argument is not numeric or logical: returning NA

interactive.plot <- ggplot(data) +
  geom_point(aes(x = X7,y = Y),color="#B2182B",shape=8, size=1) +
  geom_smooth(aes(x = X7, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
  ggtitle("Indeks Kesehatan vs Jumlah Kasus Diare") +
  ylab("Indeks Kesehatan") +
  xlab("Jumlah Kasus Diare (orang)") + 
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)

## `geom_smooth()` using formula = 'y ~ x'

y.bar <- mean(Y)

## Warning in mean.default(Y): argument is not numeric or logical: returning NA

interactive.plot <- ggplot(data) +
  geom_point(aes(x = X8,y = Y),color="#D6604D",shape=8, size=1) +
  geom_smooth(aes(x = X8, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
  ggtitle("Indeks Kesehatan vs Jumlah Kasus Bayi Sukar Bernafas") +
  ylab("Indeks Kesehatan") +
  xlab("Jumlah Kasus Bayi Sukar Bernafas (orang)") + 
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)

## `geom_smooth()` using formula = 'y ~ x'

y.bar <- mean(Y)

## Warning in mean.default(Y): argument is not numeric or logical: returning NA

interactive.plot <- ggplot(data) +
  geom_point(aes(x = X9,y = Y),color="#F4A582",shape=8, size=1) +
  geom_smooth(aes(x = X9, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
  ggtitle("Indeks Kesehatan vs Jumlah Rumah Sakit") +
  ylab("Indeks Kesehatan") +
  xlab("Jumlah Rumah Sakit (unit)") + 
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)

## `geom_smooth()` using formula = 'y ~ x'

y.bar <- mean(Y)

## Warning in mean.default(Y): argument is not numeric or logical: returning NA

interactive.plot <- ggplot(data) +
  geom_point(aes(x = X10,y = Y),color="#99CC00",shape=8, size=1) +
  geom_smooth(aes(x = X10, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
  ggtitle("Indeks Kesehatan vs Jumlah Dokter Umum") +
  ylab("Indeks Kesehatan") +
  xlab("Jumlah Dokter Umum (orang)") + 
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)

## `geom_smooth()` using formula = 'y ~ x'

y.bar <- mean(Y)

## Warning in mean.default(Y): argument is not numeric or logical: returning NA

interactive.plot <- ggplot(data) +
  geom_point(aes(x = X11,y = Y),color="#92C5DE",shape=8, size=1) +
  geom_smooth(aes(x = X11, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
  ggtitle("Indeks Kesehatan vs Jumlah Posyandu") +
  ylab("Indeks Kesehatan") +
  xlab("Jumlah Posyandu (orang)") + 
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)

## `geom_smooth()` using formula = 'y ~ x'

y.bar <- mean(Y)

## Warning in mean.default(Y): argument is not numeric or logical: returning NA

interactive.plot <- ggplot(data) +
  geom_point(aes(x = X12,y = Y),color="#92C5DE",shape=8, size=1) +
  geom_smooth(aes(x = X12, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
  ggtitle("Indeks Kesehatan vs Perilaku Hidup Sehat dan Bersih") +
  ylab("Indeks Kesehatan") +
  xlab("Perilaku Hidup Sehat dan Bersih (rumah tangga)") + 
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)

## `geom_smooth()` using formula = 'y ~ x'

y.bar <- mean(Y)

## Warning in mean.default(Y): argument is not numeric or logical: returning NA

interactive.plot <- ggplot(data) +
  geom_point(aes(x = X13,y = Y),color="#92C5DE",shape=8, size=1) +
  geom_smooth(aes(x = X13, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
  ggtitle("Indeks Kesehatan vs Kepemilikan Jaminan Kesehatan") +
  ylab("Indeks Kesehatan") +
  xlab("Kepemilikan Jaminan Kesehatan (persen)") + 
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)

## `geom_smooth()` using formula = 'y ~ x'

y.bar <- mean(Y)

## Warning in mean.default(Y): argument is not numeric or logical: returning NA

interactive.plot <- ggplot(data) +
  geom_point(aes(x = X14,y = Y),color="#92C5DE",shape=8, size=1) +
  geom_smooth(aes(x = X14, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
  ggtitle("Indeks Kesehatan vs Jumlah Bidan") +
  ylab("Indeks Kesehatan") +
  xlab("Jumlah Bidan (orang)") + 
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)

## `geom_smooth()` using formula = 'y ~ x'

Matriks Korelasi

cor_mat <- cor(model$model[, -1])
corrplot(cor_mat, method = 'number')

## Matriks Scatterplot

ggpairs(data,
        upper = list(continuous = wrap('cor', size = 2)),
        title = "Matriks Scatterplot")

Bar Chart

ggplot(data, aes(x = Y)) + 
  geom_histogram(binwidth = 0.05, color = "black", fill = "#69b3a2", alpha = 0.8, 
                 size = 0.1, position = "identity", show.legend = FALSE) +
  geom_vline(aes(xintercept = mean(Y)), color = "red", linetype = "dashed", size = 1) +
  geom_density(alpha = 0.2, fill = "#FF9999") +
  ggtitle("Sebaran Nilai Human Freedom Index") +
  xlab("Indeks Kesehatan") + ylab("Nilai") +
  theme_bw() +
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(colour = "black"))

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Eksplorasi Kondisi Gauss-Markov

# Plot Sisaan vs yduga: ragam sisaan homogen
plot(model,1)

# Plot Sisaan vs Urutan: sisaan saling bebas
plot(x = 1:dim(data)[1],
     y = model$residuals,
     type = 'b', 
     ylab = "Residuals",
     xlab = "Observation")

# qq-plot: sisaan menyebar normal
plot(model,2)

Uji Formal Kondisi Gauss-Markov

# Asumsi Gauss-Markov: Nilai harapan sisaan sama dengan nol
t.test(model$residuals,
       mu = 0,
       conf.level = 0.95)

## 
##  One Sample t-test
## 
## data:  model$residuals
## t = -1.706e-16, df = 26, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.396341  0.396341
## sample estimates:
##    mean of x 
## -3.28955e-17

# Asumsi Gauss-Markov: Ragam Sisaan Homogen
ncvTest(model)

## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 0.130579, Df = 1, p = 0.71783

bptest(model)

## 
##  studentized Breusch-Pagan test
## 
## data:  model
## BP = 13.346, df = 14, p-value = 0.4995

# Asumsi Gauss-Markov: Sisaan saling bebas
runs.test(model$residuals)

## 
##  Runs Test
## 
## data:  model$residuals
## statistic = 1.6013, runs = 18, n1 = 13, n2 = 13, n = 26, p-value =
## 0.1093
## alternative hypothesis: nonrandomness

dwtest(model)

## 
##  Durbin-Watson test
## 
## data:  model
## DW = 1.8069, p-value = 0.2377
## alternative hypothesis: true autocorrelation is greater than 0

# Asumsi Normalitas Sisaan
shapiro.test(model$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  model$residuals
## W = 0.97726, p-value = 0.796

semua asumsi terpenuhi

Pemeriksaan Multikolinieritas

vif(model)

##         X1         X2         X3         X4         X5         X6         X7 
##   3.302520   1.384595  19.570860   8.544961 174.068025   3.591355 132.137241 
##         X8         X9        X10        X11        X12        X13        X14 
##  23.805753  15.924641   5.570065  16.191167   5.352401   4.257106   4.868067

terdapat multikol, hapus yg vif > 10

data_sub1 <- subset(data, select = -c(X5))
model_sub1 <- lm(Y ~ X1 + X2 + X3 + X4 + X6 + X7 + X8 + X9 + X10 + X11 + X12 + X13 + X14 , data = data)
vif(model_sub1)

##        X1        X2        X3        X4        X6        X7        X8        X9 
##  2.964318  1.368724 15.856794  7.459559  3.085236 30.572404 11.631770 12.883843 
##       X10       X11       X12       X13       X14 
##  3.384001 15.603791  5.174832  3.638962  4.853296

data_sub2 <- subset(data, select = -c(X5,X7))
model_sub2 <- lm(Y ~ X1 + X2 + X3 + X4 + X6 + X8 + X9 + X10 + X11 + X12 + X13 + X14 , data = data_sub1)
vif(model_sub2)

##        X1        X2        X3        X4        X6        X8        X9       X10 
##  2.807860  1.357820 14.042467  7.323159  2.926798  8.671966  4.619223  3.272025 
##       X11       X12       X13       X14 
## 15.529109  4.132278  3.571066  4.765114

data_sub3 <- subset(data, select = -c(X5,X7,X11))
model_sub3 <- lm(Y ~ X1 + X2 + X3 + X4 + X6 + X8 + X9 + X10 + X12 + X13 + X14 , data = data_sub2)
vif(model_sub3)

##        X1        X2        X3        X4        X6        X8        X9       X10 
##  2.568227  1.327547 13.832819  6.279583  1.365726  8.585269  4.561951  3.076567 
##       X12       X13       X14 
##  3.528399  2.276848  4.752695

data_sub4 <- subset(data, select = -c(X3,X5,X7,X11))
model_sub4 <- lm(Y ~ X1 + X2 + X4 + X6 + X8 + X9 + X10 + X12 + X13 + X14 , data = data_sub3)
vif(model_sub4)

##       X1       X2       X4       X6       X8       X9      X10      X12 
## 2.530233 1.318418 2.412155 1.363352 2.782469 4.321527 2.779123 3.165828 
##      X13      X14 
## 2.276212 4.681330

Pemeriksaan Amatan Berpengaruh

Tabel hii dan ri

s <- sqrt(anova(model_sub4)["Residuals", "Mean Sq"])
n = dim(data_sub4)[1]
p = length(model_sub4$coefficients)
hii=hatvalues(model_sub4)
Obs = c(1:n)
ei = model_sub4$residuals
ri = ei/(s*sqrt(1-hii))
Di = (ri^2/p)*(hii/(1-hii))
summ <- cbind.data.frame(Obs, data_sub4, hii, ri, Di)
head(summ)

##   Obs     Y    X1     X2    X4     X6     X8 X9 X10    X12   X13  X14       hii
## 1   1 79.46 92.28  68.60 18666  12185 146427 30 596 552261 63.99 1950 0.7150874
## 2   2 79.29 85.70 100.00 10231 556178  47222  9 173 382327 55.99 1516 0.7406968
## 3   3 77.82 80.79  92.89  6871  58089  71337  5  90 534689 48.16 1476 0.5117364
## 4   4 83.09 93.79 100.00 21018 397464  77467 15 247 388891 58.02  876 0.6048098
## 5   5 79.77 78.15  93.47 30266 160510  22451  7  98 366250 43.32 1460 0.8045037
## 6   6 76.85 89.35  89.74 14122   5211  40549  2  32 226236 41.21 1140 0.1986898
##            ri           Di
## 1 -1.09577941 0.2739687805
## 2 -0.68092109 0.1204019151
## 3  0.43762837 0.0182477882
## 4 -0.05681326 0.0004490754
## 5  0.63123614 0.1490666559
## 6 -1.62083443 0.0592187928

Mendeteksi Titik Leverage

for (i in 1:dim(summ)[1]){
  cutoff <- 2*p/n
  leverage <- which(hii > cutoff)
}
leverage

## 21 
## 21

summ_leverage <- subset(summ, Obs %in% leverage)
summ_leverage <- subset(summ, Obs %in% leverage, select = c("Obs", "hii", "ri", "Di"))
summ_leverage

##    Obs       hii       ri       Di
## 21  21 0.9749494 -1.90088 12.78441

library(dplyr)

summ_leverage_sorted <- summ_leverage %>% 
  arrange(desc(hii))

summ_leverage_sorted

##   Obs       hii       ri       Di
## 1  21 0.9749494 -1.90088 12.78441

Mendeteksi Pencilan

for (i in 1:dim(summ)[1]){
  absri <- abs(summ$ri)
  pencilan <- which(absri > 2)
}
pencilan

## [1] 27

summ_pencilan <- subset(summ, Obs %in% pencilan)
summ_pencilan <- subset(summ, Obs %in% pencilan, select = c("Obs", "hii", "ri", "Di"))
summ_pencilan

##    Obs       hii        ri        Di
## 27  27 0.3633226 -2.075939 0.2235679

library(dplyr)

summ_pencilan_sorted <- summ_pencilan %>% 
  arrange(desc(hii))

summ_pencilan_sorted

##   Obs       hii        ri        Di
## 1  27 0.3633226 -2.075939 0.2235679

Mendeteksi Amatan berpengaruh

Cook’s D

for (i in 1:dim(summ)[1]){
  fcrit = qf(p=0.95, df1=p, df2=n-p)
  amatan_berpengaruh <- which(Di > fcrit)
}
amatan_berpengaruh

## 21 
## 21

summ <- cbind.data.frame(Obs, data_sub4, hii, ri, Di)
summ_sorted <- summ %>% 
  arrange(desc(Di))

head(summ_sorted)

##   Obs     Y    X1     X2    X4    X6     X8 X9  X10    X12   X13  X14       hii
## 1  21 84.23 99.03 100.00  6614 65357  54760 39 2286 110473 78.34 1261 0.9749494
## 2  23 85.35 98.28  93.03  4575 43665  42755 47  819  25860 78.42 1178 0.7229282
## 3  16 83.14 94.86  92.59  3899 32140  46815 53  556 583003 92.32 1632 0.6642859
## 4   1 79.46 92.28  68.60 18666 12185 146427 30  596 552261 63.99 1950 0.7150874
## 5   9 80.72 94.99  91.26 10635 29798  87575 12  321 206354 66.95 1560 0.4634679
## 6  27 79.22 97.67  98.98   846 10841   7042  4   39  47023 96.88  190 0.3633226
##          ri         Di
## 1 -1.900880 12.7844106
## 2  1.652256  0.6475376
## 3 -1.392284  0.3486973
## 4 -1.095779  0.2739688
## 5  1.767605  0.2453588
## 6 -2.075939  0.2235679

plot amatan berpengaruh

plot(model_sub4)

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced

Plot Titik Leverage dan Pencilan

ols_plot_resid_lev(model)

Menghapus Titik Leverage dan Pencilan

# Menggabungkan indeks titik leverage dan pencilan
observasi_normal <- setdiff(1:n, c(leverage, pencilan))

# Membuat data baru dengan menghapus titik leverage dan pencilan
data_clean <- data_sub4[observasi_normal,]

# Membuat model
model2 <- lm(Y ~ X1+X2+X4+X6+X8+X9+X10+X12+X13+X14 ,data= data_clean)
model2

## 
## Call:
## lm(formula = Y ~ X1 + X2 + X4 + X6 + X8 + X9 + X10 + X12 + X13 + 
##     X14, data = data_clean)
## 
## Coefficients:
## (Intercept)           X1           X2           X4           X6           X8  
##   6.432e+01    1.497e-01    1.957e-02    7.938e-05    1.192e-06   -3.135e-05  
##          X9          X10          X12          X13          X14  
##  -1.809e-02    8.557e-03    5.013e-06    1.540e-02   -2.278e-03

summary(model2)

## 
## Call:
## lm(formula = Y ~ X1 + X2 + X4 + X6 + X8 + X9 + X10 + X12 + X13 + 
##     X14, data = data_clean)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8675 -0.5187  0.3636  0.6103  1.2872 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.432e+01  7.109e+00   9.048 3.18e-07 ***
## X1           1.497e-01  6.249e-02   2.395   0.0311 *  
## X2           1.957e-02  2.206e-02   0.887   0.3900    
## X4           7.938e-05  5.151e-05   1.541   0.1456    
## X6           1.192e-06  2.124e-06   0.561   0.5836    
## X8          -3.134e-05  1.624e-05  -1.930   0.0741 .  
## X9          -1.809e-02  6.843e-02  -0.264   0.7953    
## X10          8.557e-03  4.523e-03   1.892   0.0794 .  
## X12          5.013e-06  2.897e-06   1.731   0.1055    
## X13          1.540e-02  2.468e-02   0.624   0.5426    
## X14         -2.278e-03  1.058e-03  -2.154   0.0492 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.16 on 14 degrees of freedom
## Multiple R-squared:  0.8232, Adjusted R-squared:  0.6969 
## F-statistic: 6.517 on 10 and 14 DF,  p-value: 0.0009137

ols_plot_resid_lev(model2)

Pemeriksaan Multikolinieritas

vif(model2)

##        X1        X2        X4        X6        X8        X9       X10       X12 
##  2.439064  1.340845  2.498109  1.407700  4.294924 14.236990 14.486095  3.933931 
##       X13       X14 
##  2.586411  4.142047

Terlihat ada beberapa multikol maka dilakukan penghapusan

dataclean_sub1 <- subset(data_clean, select = -c(X10))
modelclean_sub1 <- lm(Y ~ X1 + X2 + X4 + X6 + X8 + X9 + X12 + X13 + X14 , data = data_clean)
vif(modelclean_sub1)

##       X1       X2       X4       X6       X8       X9      X12      X13 
## 2.430352 1.329379 2.430080 1.385045 2.508107 3.004751 2.603956 2.434627 
##      X14 
## 4.137906

sudah tidak ada multikol

Eksplorasi asumsi

plot(modelclean_sub1,1)                # plot sisaan vs yduga

plot(modelclean_sub1,2)                # qq-plot

plot(x = 1:dim(dataclean_sub1)[1],
     y = modelclean_sub1$residuals,
     type = 'b', 
     ylab = "Residuals",
     xlab = "Observation")       # plot sisaan vs urutan

Uji Formal Kondisi Gauss-Markov

# Asumsi Gauss-Markov: Nilai harapan sisaan sama dengan nol
t.test(modelclean_sub1$residuals,
       mu = 0,
       conf.level = 0.95)

## 
##  One Sample t-test
## 
## data:  modelclean_sub1$residuals
## t = -1.1183e-17, df = 24, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.4098126  0.4098126
## sample estimates:
##     mean of x 
## -2.220446e-18

# Asumsi Gauss-Markov: Ragam Sisaan Homogen
ncvTest(modelclean_sub1)

## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 0.004054268, Df = 1, p = 0.94923

# Asumsi Gauss-Markov: Sisaan saling bebas
runs.test(modelclean_sub1$residuals)

## 
##  Runs Test
## 
## data:  modelclean_sub1$residuals
## statistic = 2.5045, runs = 19, n1 = 12, n2 = 12, n = 24, p-value =
## 0.01226
## alternative hypothesis: nonrandomness

# Asumsi Normalitas Sisaan
shapiro.test(modelclean_sub1$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  modelclean_sub1$residuals
## W = 0.95321, p-value = 0.2958

model tidak memenuhi asumsi sisaan saling bebas, dilakukan transformasi dengan WLS

Weighted Least Square (WLS)

ads.weights=1/lm(abs(modelclean_sub1$residuals) ~ model2$fitted.values)$fitted.values^2
model_wls <- lm(Y~ X1 + X2 + X4 + X6 + X8 + X9 + X12 + X13 + X14 ,data = dataclean_sub1,weights = ads.weights)
model_wls

## 
## Call:
## lm(formula = Y ~ X1 + X2 + X4 + X6 + X8 + X9 + X12 + X13 + X14, 
##     data = dataclean_sub1, weights = ads.weights)
## 
## Coefficients:
## (Intercept)           X1           X2           X4           X6           X8  
##   6.411e+01    1.418e-01    2.345e-02    9.459e-05    1.694e-06   -1.143e-05  
##          X9          X12          X13          X14  
##   9.654e-02    1.916e-06    2.717e-02   -2.345e-03

summary(model_wls)

## 
## Call:
## lm(formula = Y ~ X1 + X2 + X4 + X6 + X8 + X9 + X12 + X13 + X14, 
##     data = dataclean_sub1, weights = ads.weights)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5406 -0.7276  0.2297  0.8535  1.9823 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.411e+01  7.659e+00   8.370 4.91e-07 ***
## X1           1.418e-01  6.723e-02   2.109   0.0522 .  
## X2           2.345e-02  2.385e-02   0.983   0.3410    
## X4           9.459e-05  5.453e-05   1.735   0.1033    
## X6           1.694e-06  2.272e-06   0.746   0.4674    
## X8          -1.143e-05  1.338e-05  -0.854   0.4065    
## X9           9.654e-02  3.429e-02   2.815   0.0131 *  
## X12          1.916e-06  2.580e-06   0.742   0.4693    
## X13          2.717e-02  2.582e-02   1.053   0.3092    
## X14         -2.345e-03  1.142e-03  -2.054   0.0578 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.579 on 15 degrees of freedom
## Multiple R-squared:  0.7763, Adjusted R-squared:  0.642 
## F-statistic: 5.782 on 9 and 15 DF,  p-value: 0.00151

vif(model_wls)

##       X1       X2       X4       X6       X8       X9      X12      X13 
## 2.441806 1.332315 2.405286 1.378909 2.507796 2.980975 2.649303 2.414206 
##      X14 
## 4.117256

# Asumsi Gauss-Markov: Nilai harapan sisaan sama dengan nol
t.test(model_wls$residuals,
       mu = 0,
       conf.level = 0.95)

## 
##  One Sample t-test
## 
## data:  model_wls$residuals
## t = 0.013361, df = 24, p-value = 0.9895
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.4072019  0.4125082
## sample estimates:
##   mean of x 
## 0.002653187

# Asumsi Gauss-Markov: Ragam Sisaan Homogen
ncvTest(model_wls)

## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 0.01094565, Df = 1, p = 0.91668

# Asumsi Gauss-Markov: Sisaan saling bebas
runs.test(model_wls$residuals)

## 
##  Runs Test
## 
## data:  model_wls$residuals
## statistic = 2.5045, runs = 19, n1 = 12, n2 = 12, n = 24, p-value =
## 0.01226
## alternative hypothesis: nonrandomness

# Asumsi Normalitas Sisaan
shapiro.test(model_wls$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  model_wls$residuals
## W = 0.95091, p-value = 0.2628

model belum memenuhi asumsi, gunakan MKT

Metode Kuadrat Terkecil (MKT)

Y_cl <- as.matrix(dataclean_sub1$Y)
X_cl <- as.matrix(cbind(rep(1,25),dataclean_sub1$X1,dataclean_sub1$X2,
                        dataclean_sub1$X4,dataclean_sub1$X6,dataclean_sub1$X8,
                        dataclean_sub1$X9,dataclean_sub1$X12,dataclean_sub1$X13,
                        dataclean_sub1$X14))
b_cl <- solve(t(X_cl)%*%X_cl)%*%t(X_cl)%*%Y_cl;round(b_cl,5)

##           [,1]
##  [1,] 64.07752
##  [2,]  0.14262
##  [3,]  0.02343
##  [4,]  0.00010
##  [5,]  0.00000
##  [6,] -0.00001
##  [7,]  0.09689
##  [8,]  0.00000
##  [9,]  0.02672
## [10,] -0.00234

vif(modelclean_sub1)

##       X1       X2       X4       X6       X8       X9      X12      X13 
## 2.430352 1.329379 2.430080 1.385045 2.508107 3.004751 2.603956 2.434627 
##      X14 
## 4.137906

# Asumsi Gauss-Markov: Nilai harapan sisaan sama dengan nol
t.test(modelclean_sub1$residuals,
       mu = 0,
       conf.level = 0.95)

## 
##  One Sample t-test
## 
## data:  modelclean_sub1$residuals
## t = -1.1183e-17, df = 24, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.4098126  0.4098126
## sample estimates:
##     mean of x 
## -2.220446e-18

# Asumsi Gauss-Markov: Ragam Sisaan Homogen
ncvTest(modelclean_sub1)

## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 0.004054268, Df = 1, p = 0.94923

# Asumsi Gauss-Markov: Sisaan saling bebas
runs.test(modelclean_sub1$residuals)

## 
##  Runs Test
## 
## data:  modelclean_sub1$residuals
## statistic = 2.5045, runs = 19, n1 = 12, n2 = 12, n = 24, p-value =
## 0.01226
## alternative hypothesis: nonrandomness

# Asumsi Normalitas Sisaan
shapiro.test(modelclean_sub1$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  modelclean_sub1$residuals
## W = 0.95321, p-value = 0.2958

Tranformasi Box-Cox

bc_model <- boxcox(Y~ X1 + X2 + X4 + X6 + X8 + X9 + X12 + X13 + X14,data = dataclean_sub1,
         lambda = seq(-2, 2, by = 0.1))

(optimal_lambda <- bc_model$x[which.max(bc_model$y)])

## [1] 2

# Mengubah nilai Y di dataclean_sub1 dengan hasil transformasi
dataclean_sub1$Y <- (((dataclean_sub1$Y)^optimal_lambda)-1)/optimal_lambda

model3 <- lm(Y ~ X1 + X2 + X4 + X6 + X8 + X9 + X12 + X13 + X14 ,data= dataclean_sub1)
model3

## 
## Call:
## lm(formula = Y ~ X1 + X2 + X4 + X6 + X8 + X9 + X12 + X13 + X14, 
##     data = dataclean_sub1)
## 
## Coefficients:
## (Intercept)           X1           X2           X4           X6           X8  
##   1.916e+03    1.145e+01    1.967e+00    7.715e-03    1.365e-04   -9.286e-04  
##          X9          X12          X13          X14  
##   7.969e+00    1.431e-04    2.132e+00   -1.899e-01

# Asumsi Gauss-Markov: Nilai harapan sisaan sama dengan nol
t.test(model3$residuals,
       mu = 0,
       conf.level = 0.95)

## 
##  One Sample t-test
## 
## data:  model3$residuals
## t = 1.5936e-16, df = 24, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -33.12779  33.12779
## sample estimates:
##    mean of x 
## 2.557954e-15

# Asumsi Gauss-Markov: Ragam Sisaan Homogen
ncvTest(model3)

## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 0.0050767, Df = 1, p = 0.9432

# Asumsi Gauss-Markov: Sisaan saling bebas
runs.test(model3$residuals)

## 
##  Runs Test
## 
## data:  model3$residuals
## statistic = 2.5045, runs = 19, n1 = 12, n2 = 12, n = 24, p-value =
## 0.01226
## alternative hypothesis: nonrandomness

# Asumsi Normalitas Sisaan
shapiro.test(model3$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  model3$residuals
## W = 0.95146, p-value = 0.2705

masih tidak memenuhi asumsi sisaan saling bebas, gunakan OLS (menentukan model terbaik)

Ordinary Least Square (OLS)

Alternatif pembuatan model terbaik

dataclean_sub1 <- subset(data_clean, select = -c(X10))
modelclean_sub1 <- lm(Y ~ X1 + X2 + X4 + X6 + X8 + X9 + X12 + X13 + X14 , data = data_clean)

Forward

formula1 <- Y~.
step(lm(formula1,data=dataclean_sub1),direction="forward")

## Start:  AIC=18.62
## Y ~ X1 + X2 + X4 + X6 + X8 + X9 + X12 + X13 + X14

## 
## Call:
## lm(formula = Y ~ X1 + X2 + X4 + X6 + X8 + X9 + X12 + X13 + X14, 
##     data = dataclean_sub1)
## 
## Coefficients:
## (Intercept)           X1           X2           X4           X6           X8  
##   6.408e+01    1.426e-01    2.343e-02    9.546e-05    1.702e-06   -1.153e-05  
##          X9          X12          X13          X14  
##   9.689e-02    1.827e-06    2.672e-02   -2.342e-03

Backward

formula1 <- Y~.
step(lm(formula1,data=dataclean_sub1),direction="backward")

## Start:  AIC=18.62
## Y ~ X1 + X2 + X4 + X6 + X8 + X9 + X12 + X13 + X14
## 
##        Df Sum of Sq    RSS    AIC
## - X12   1    0.8085 24.465 17.459
## - X6    1    0.8778 24.534 17.530
## - X8    1    1.1613 24.818 17.817
## - X2    1    1.5311 25.187 18.187
## - X13   1    1.6748 25.331 18.329
## <none>              23.656 18.619
## - X4    1    4.7517 28.408 21.195
## - X14   1    6.6002 30.256 22.771
## - X1    1    7.0353 30.692 23.128
## - X9    1   12.7837 36.440 27.420
## 
## Step:  AIC=17.46
## Y ~ X1 + X2 + X4 + X6 + X8 + X9 + X13 + X14
## 
##        Df Sum of Sq    RSS    AIC
## - X8    1    0.7743 25.239 16.238
## - X6    1    1.0831 25.548 16.542
## - X13   1    1.7231 26.188 17.160
## <none>              24.465 17.459
## - X2    1    2.1177 26.582 17.534
## - X4    1    4.4899 28.955 19.671
## - X14   1    5.7924 30.257 20.771
## - X1    1    6.2550 30.720 21.151
## - X9    1   14.1182 38.583 26.848
## 
## Step:  AIC=16.24
## Y ~ X1 + X2 + X4 + X6 + X9 + X13 + X14
## 
##        Df Sum of Sq    RSS    AIC
## - X6    1    1.1554 26.394 15.357
## - X13   1    1.4581 26.697 15.642
## - X2    1    1.8760 27.115 16.030
## <none>              25.239 16.238
## - X4    1    3.7244 28.963 17.679
## - X1    1    5.4836 30.723 19.153
## - X14   1   12.7233 37.962 24.443
## - X9    1   15.3401 40.579 26.110
## 
## Step:  AIC=15.36
## Y ~ X1 + X2 + X4 + X9 + X13 + X14
## 
##        Df Sum of Sq    RSS    AIC
## - X13   1    2.0578 28.452 15.234
## <none>              26.394 15.357
## - X2    1    3.4119 29.806 16.396
## - X1    1    5.1803 31.575 17.837
## - X4    1    5.4409 31.835 18.043
## - X14   1   12.1651 38.560 22.833
## - X9    1   14.7546 41.149 24.458
## 
## Step:  AIC=15.23
## Y ~ X1 + X2 + X4 + X9 + X14
## 
##        Df Sum of Sq    RSS    AIC
## <none>              28.452 15.234
## - X2    1     2.555 31.008 15.384
## - X4    1     3.920 32.372 16.460
## - X1    1     4.235 32.688 16.703
## - X14   1    21.828 50.281 27.469
## - X9    1    36.502 64.954 33.870

## 
## Call:
## lm(formula = Y ~ X1 + X2 + X4 + X9 + X14, data = dataclean_sub1)
## 
## Coefficients:
## (Intercept)           X1           X2           X4           X9          X14  
##   7.004e+01    9.854e-02    2.716e-02    7.289e-05    1.251e-01   -2.893e-03

Stepwise

step(lm(formula1,data=dataclean_sub1),direction="both")

## Start:  AIC=18.62
## Y ~ X1 + X2 + X4 + X6 + X8 + X9 + X12 + X13 + X14
## 
##        Df Sum of Sq    RSS    AIC
## - X12   1    0.8085 24.465 17.459
## - X6    1    0.8778 24.534 17.530
## - X8    1    1.1613 24.818 17.817
## - X2    1    1.5311 25.187 18.187
## - X13   1    1.6748 25.331 18.329
## <none>              23.656 18.619
## - X4    1    4.7517 28.408 21.195
## - X14   1    6.6002 30.256 22.771
## - X1    1    7.0353 30.692 23.128
## - X9    1   12.7837 36.440 27.420
## 
## Step:  AIC=17.46
## Y ~ X1 + X2 + X4 + X6 + X8 + X9 + X13 + X14
## 
##        Df Sum of Sq    RSS    AIC
## - X8    1    0.7743 25.239 16.238
## - X6    1    1.0831 25.548 16.542
## - X13   1    1.7231 26.188 17.160
## <none>              24.465 17.459
## - X2    1    2.1177 26.582 17.534
## + X12   1    0.8085 23.656 18.619
## - X4    1    4.4899 28.955 19.671
## - X14   1    5.7924 30.257 20.771
## - X1    1    6.2550 30.720 21.151
## - X9    1   14.1182 38.583 26.848
## 
## Step:  AIC=16.24
## Y ~ X1 + X2 + X4 + X6 + X9 + X13 + X14
## 
##        Df Sum of Sq    RSS    AIC
## - X6    1    1.1554 26.394 15.357
## - X13   1    1.4581 26.697 15.642
## - X2    1    1.8760 27.115 16.030
## <none>              25.239 16.238
## + X8    1    0.7743 24.465 17.459
## - X4    1    3.7244 28.963 17.679
## + X12   1    0.4214 24.818 17.817
## - X1    1    5.4836 30.723 19.153
## - X14   1   12.7233 37.962 24.443
## - X9    1   15.3401 40.579 26.110
## 
## Step:  AIC=15.36
## Y ~ X1 + X2 + X4 + X9 + X13 + X14
## 
##        Df Sum of Sq    RSS    AIC
## - X13   1    2.0578 28.452 15.234
## <none>              26.394 15.357
## + X6    1    1.1554 25.239 16.238
## - X2    1    3.4119 29.806 16.396
## + X8    1    0.8466 25.548 16.542
## + X12   1    0.5614 25.833 16.820
## - X1    1    5.1803 31.575 17.837
## - X4    1    5.4409 31.835 18.043
## - X14   1   12.1651 38.560 22.833
## - X9    1   14.7546 41.149 24.458
## 
## Step:  AIC=15.23
## Y ~ X1 + X2 + X4 + X9 + X14
## 
##        Df Sum of Sq    RSS    AIC
## <none>              28.452 15.234
## + X13   1     2.058 26.394 15.357
## - X2    1     2.555 31.008 15.384
## + X6    1     1.755 26.697 15.642
## - X4    1     3.920 32.372 16.460
## + X12   1     0.723 27.729 16.590
## - X1    1     4.235 32.688 16.703
## + X8    1     0.535 27.918 16.760
## - X14   1    21.828 50.281 27.469
## - X9    1    36.502 64.954 33.870

## 
## Call:
## lm(formula = Y ~ X1 + X2 + X4 + X9 + X14, data = dataclean_sub1)
## 
## Coefficients:
## (Intercept)           X1           X2           X4           X9          X14  
##   7.004e+01    9.854e-02    2.716e-02    7.289e-05    1.251e-01   -2.893e-03

Best Subset Regression

best <- ols_step_best_subset(modelclean_sub1)
best

##           Best Subsets Regression           
## --------------------------------------------
## Model Index    Predictors
## --------------------------------------------
##      1         X13                           
##      2         X9 X14                        
##      3         X6 X9 X14                     
##      4         X1 X6 X9 X14                  
##      5         X1 X4 X6 X9 X14               
##      6         X1 X2 X4 X9 X13 X14           
##      7         X1 X2 X4 X6 X9 X13 X14        
##      8         X1 X2 X4 X6 X8 X9 X13 X14     
##      9         X1 X2 X4 X6 X8 X9 X12 X13 X14 
## --------------------------------------------
## 
##                                                    Subsets Regression Summary                                                   
## --------------------------------------------------------------------------------------------------------------------------------
##                        Adj.        Pred                                                                                          
## Model    R-Square    R-Square    R-Square     C(p)        AIC        SBIC        SBC        MSEP       FPE       HSP       APC  
## --------------------------------------------------------------------------------------------------------------------------------
##   1        0.3494      0.3211      0.1752    22.9546    102.4433    29.3996    106.1000    75.3696    3.2550    0.1370    0.7638 
##   2        0.6541      0.6227      0.5568     4.3677     88.6483    18.1107     93.5238    41.9771    1.8761    0.0798    0.4402 
##   3        0.6879      0.6433      0.5507     4.0829     88.0760    18.5464     94.1703    39.7663    1.8366    0.0792    0.4310 
##   4        0.7135      0.6562      0.5439     4.3564     87.9400    19.7387     95.2532    38.4314    1.8316    0.0803    0.4298 
##   5        0.7379      0.6689      0.5838     4.7079     87.7146    21.3702     96.2467    37.1115    1.8226    0.0817    0.4277 
##   6        0.7523      0.6697      0.5568     5.7363     88.3038    23.7820     98.0548    37.1385    1.8769    0.0863    0.4404 
##   7        0.7631      0.6656      0.5468     7.0036     89.1848    26.6002    100.1547    37.7323    1.9597    0.0928    0.4598 
##   8        0.7704      0.6556       0.491     8.5126     90.4058    29.7194    102.5945    39.0130    2.0795    0.1019    0.4879 
##   9        0.7780      0.6447      0.1905    10.0000     91.5657    33.0632    104.9733    40.4183    2.2079    0.1126    0.5181 
## --------------------------------------------------------------------------------------------------------------------------------
## AIC: Akaike Information Criteria 
##  SBIC: Sawa's Bayesian Information Criteria 
##  SBC: Schwarz Bayesian Criteria 
##  MSEP: Estimated error of prediction, assuming multivariate normality 
##  FPE: Final Prediction Error 
##  HSP: Hocking's Sp 
##  APC: Amemiya Prediction Criteria

didaptkan model terbaik yaitu Y ~ X4 + X8 + X10

data_best <- subset(dataclean_sub1, select = c(X1, X4, X6, X9, X14))
model_best <- lm(Y ~ X1+X4+X6+X9+X14, data = dataclean_sub1)
model_best

## 
## Call:
## lm(formula = Y ~ X1 + X4 + X6 + X9 + X14, data = dataclean_sub1)
## 
## Coefficients:
## (Intercept)           X1           X4           X6           X9          X14  
##   7.210e+01    1.015e-01    6.112e-05    2.957e-06    1.218e-01   -2.940e-03

Uji Formal Akhir Guass-Markov

# Asumsi Gauss-Markov: Nilai harapan sisaan sama dengan nol
t.test(model_best$residuals,
       mu = 0,
       conf.level = 0.95)

## 
##  One Sample t-test
## 
## data:  model_best$residuals
## t = 8.2337e-17, df = 24, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.4452691  0.4452691
## sample estimates:
##    mean of x 
## 1.776357e-17

# Asumsi Gauss-Markov: Ragam Sisaan Homogen
ncvTest(model_best)

## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 1.06012, Df = 1, p = 0.30319

# Asumsi Gauss-Markov: Sisaan saling bebas
runs.test(model_best$residuals)

## 
##  Runs Test
## 
## data:  model_best$residuals
## statistic = 0.83485, runs = 15, n1 = 12, n2 = 12, n = 24, p-value =
## 0.4038
## alternative hypothesis: nonrandomness

# Asumsi Normalitas Sisaan
shapiro.test(model_best$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  model_best$residuals
## W = 0.98815, p-value = 0.9884

Kesimpulan (Model Terbaik)

Persamaan Terbaik Regresi Bergandanya adalah:

\[y=b0-b1X1+b4X4+b6X6+b9X9+b14X14\] \[y= 72.1-0.1015X1+0.00006112X4+0.000002957X6+0.1218X9-0.002940X14\]