Tugas Akhir Analisis Regresi
Packages
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'olsrr'
##
## The following object is masked from 'package:datasets':
##
## rivers
## corrplot 0.92 loaded
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:olsrr':
##
## cement
##
## The following object is masked from 'package:plotly':
##
## select
##
## The following object is masked from 'package:dplyr':
##
## select
##
## Attaching package: 'psych'
##
## The following object is masked from 'package:car':
##
## logit
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
Data
data <- read_excel("/Users/user/Downloads/Documents/Anreg /Tugas Akhir/DATASET ANREG2.xlsx")
Y<-data$IPKM
X0<-rep(1,27)
X1<-data$melek
X2<-data$miskin
X3<-data$rumah_sakit
X4<-data$kepadatan
X5<-data$lama
X6<-data$pengangguran
X7<-data$upah
X8<-data$PDRB
X9<-data$sekolah
X10<-data$sampah
data<-data.frame(cbind(Y,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10))
data[] <- lapply(data, as.numeric)
data## Y X1 X2 X3 X4 X5 X6 X7 X8 X9 X10
## 1 79.46 98.38 474.7 30 1861 8.34 10.64 4217206 30173 6513 396.53
## 2 79.29 99.53 186.3 9 674 7.11 7.77 3125445 17953 5012 351.54
## 3 77.82 99.18 246.8 5 700 7.20 8.41 2699814 13590 4398 457.22
## 4 83.09 99.47 258.6 15 2136 9.08 6.98 3241930 23782 422 559.62
## 5 79.77 99.60 276.7 7 847 7.83 7.60 1975221 15991 3983 330.21
## 6 76.85 98.96 194.1 2 705 7.73 4.17 2326772 13829 3367 130.02
## 7 80.46 98.63 94.0 6 782 8.00 3.75 1897867 19169 3156 151.74
## 8 83.12 98.03 140.3 12 1003 7.88 9.81 1908102 15426 2783 262.64
## 9 80.72 94.81 266.1 12 2150 7.40 8.11 2279983 15342 2565 235.00
## 10 78.09 98.16 147.1 5 1004 7.49 4.16 2027619 18197 241 155.60
## 11 81.40 99.21 120.1 3 745 8.72 7.72 3241930 21972 2397 158.00
## 12 80.23 92.34 225.0 12 902 6.83 6.49 2391567 32727 2322 666.99
## 13 81.42 96.89 155.3 11 750 7.20 7.77 3064218 18580 2266 510.00
## 14 79.18 97.98 83.4 11 1036 8.11 8.75 4173569 47924 2077 163.79
## 15 80.95 98.29 199.9 26 1309 7.96 9.87 4798312 70840 2022 349.15
## 16 83.14 97.48 201.1 53 2570 9.53 10.31 4791844 82472 198 733.79
## 17 81.22 99.55 183.7 11 1439 8.22 9.63 3248283 18080 1942 311.89
## 18 79.83 98.74 37.9 1 383 8.03 1.56 1884364 19488 1871 63.28
## 19 83.28 99.78 79.2 22 9550 10.63 10.78 4330250 33153 1762 522.50
## 20 81.31 99.58 26.6 6 7377 10.14 8.83 2562434 26161 1064 180.09
## 21 84.23 99.88 109.8 39 14776 11.00 9.55 3774861 85820 1006 1573.44
## 22 81.14 98.56 31.5 11 8646 10.33 8.42 2304944 52838 814 176.98
## 23 85.35 99.79 137.4 47 12159 11.44 8.81 4816921 28283 660 402.88
## 24 84.49 99.42 64.4 26 10622 11.47 7.82 4377232 24756 532 1268.40
## 25 83.85 99.84 31.2 8 13557 11.21 10.77 3272668 42857 508 212.34
## 26 80.97 99.79 87.1 14 3988 8.23 6.62 2363390 22879 463 253.18
## 27 79.22 99.40 12.7 4 1576 8.78 5.53 1852100 16983 361 65.87
## 'data.frame': 27 obs. of 11 variables:
## $ Y : num 79.5 79.3 77.8 83.1 79.8 ...
## $ X1 : num 98.4 99.5 99.2 99.5 99.6 ...
## $ X2 : num 475 186 247 259 277 ...
## $ X3 : num 30 9 5 15 7 2 6 12 12 5 ...
## $ X4 : num 1861 674 700 2136 847 ...
## $ X5 : num 8.34 7.11 7.2 9.08 7.83 7.73 8 7.88 7.4 7.49 ...
## $ X6 : num 10.64 7.77 8.41 6.98 7.6 ...
## $ X7 : num 4217206 3125445 2699814 3241930 1975221 ...
## $ X8 : num 30173 17953 13590 23782 15991 ...
## $ X9 : num 6513 5012 4398 422 3983 ...
## $ X10: num 397 352 457 560 330 ...
Persamaan Awal Regresi
##
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 +
## X10, data = data)
##
## Coefficients:
## (Intercept) X1 X2 X3 X4 X5
## 8.942e+01 -1.576e-01 -4.597e-03 7.021e-02 -6.405e-05 7.230e-01
## X6 X7 X8 X9 X10
## 3.324e-01 -2.496e-07 -3.545e-05 -2.458e-04 1.370e-03
##
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 +
## X10, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.78117 -0.69425 0.00954 0.81977 1.72147
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.942e+01 1.746e+01 5.120 0.000103 ***
## X1 -1.576e-01 1.935e-01 -0.815 0.427202
## X2 -4.597e-03 4.539e-03 -1.013 0.326280
## X3 7.021e-02 4.033e-02 1.741 0.100886
## X4 -6.405e-05 1.603e-04 -0.400 0.694741
## X5 7.230e-01 5.773e-01 1.252 0.228423
## X6 3.324e-01 1.587e-01 2.094 0.052509 .
## X7 -2.496e-07 4.653e-07 -0.536 0.599069
## X8 -3.545e-05 1.849e-05 -1.917 0.073271 .
## X9 -2.458e-04 2.556e-04 -0.962 0.350359
## X10 1.370e-03 1.023e-03 1.339 0.199230
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.256 on 16 degrees of freedom
## Multiple R-squared: 0.7896, Adjusted R-squared: 0.6581
## F-statistic: 6.004 on 10 and 16 DF, p-value: 0.0008339
Eksplorasi Data
Hubungan tiap peubah x terhadap peubah y
## Warning in mean.default(Y): argument is not numeric or logical: returning NA
interactive.plot <- ggplot(data) +
geom_point(aes(x = X1,y = Y),color="coral",shape=8, size=1) +
geom_smooth(aes(x = X1, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("IPKM vs Angka Melek Huruf") +
ylab("IPKM") +
xlab("Angka Melek Huruf") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)## `geom_smooth()` using formula = 'y ~ x'
## Warning in mean.default(Y): argument is not numeric or logical: returning NA
interactive.plot <- ggplot(data) +
geom_point(aes(x = X2,y = Y),color="chocolate",shape=8, size=1) +
geom_smooth(aes(x = X2, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("IPKM vs Jumlah Penduduk Miskin") +
ylab("IPKM") +
xlab("Jumlah Penduduk Miskin") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)## `geom_smooth()` using formula = 'y ~ x'
## Warning in mean.default(Y): argument is not numeric or logical: returning NA
interactive.plot <- ggplot(data) +
geom_point(aes(x = X3,y = Y),color="darkgoldenrod3",shape=8, size=1) +
geom_smooth(aes(x = X3, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("IPKM vs Jumlah Rumah Sakit") +
ylab("IPKM") +
xlab("Jumlah Rumah Sakit") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)## `geom_smooth()` using formula = 'y ~ x'
## Warning in mean.default(Y): argument is not numeric or logical: returning NA
interactive.plot <- ggplot(data) +
geom_point(aes(x = X4,y = Y),color="deepskyblue4",shape=8, size=1) +
geom_smooth(aes(x = X4, y =Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("IPKM vs Kepadatan Penduduk") +
ylab("IPKM") +
xlab("Kepadatan Penduduk") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)## `geom_smooth()` using formula = 'y ~ x'
## Warning in mean.default(Y): argument is not numeric or logical: returning NA
interactive.plot <- ggplot(data) +
geom_point(aes(x = X5,y = Y),color="blueviolet",shape=8, size=1) +
geom_smooth(aes(x = X5, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("IPKM vs Rata-Rata Lama Sekolah") +
ylab("IPKM") +
xlab("Rata-Rata Lama Sekolah") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)## `geom_smooth()` using formula = 'y ~ x'
## Warning in mean.default(Y): argument is not numeric or logical: returning NA
interactive.plot <- ggplot(data) +
geom_point(aes(x = X6,y = Y),color="chartreuse4",shape=8, size=1) +
geom_smooth(aes(x = X6, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("IPKM vs Tingkat Pengangguran Terbuka") +
ylab("IPKM") +
xlab("Tingkat Pengangguran Terbuka") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)## `geom_smooth()` using formula = 'y ~ x'
## Warning in mean.default(Y): argument is not numeric or logical: returning NA
interactive.plot <- ggplot(data) +
geom_point(aes(x = X7,y = Y),color="#B2182B",shape=8, size=1) +
geom_smooth(aes(x = X7, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("IPKM vs Upah Minimum") +
ylab("IPKM") +
xlab("Upah Minimum") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)## `geom_smooth()` using formula = 'y ~ x'
## Warning in mean.default(Y): argument is not numeric or logical: returning NA
interactive.plot <- ggplot(data) +
geom_point(aes(x = X8,y = Y),color="#D6604D",shape=8, size=1) +
geom_smooth(aes(x = X8, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("IPKM vs PDRB Perkapita") +
ylab("IPKM") +
xlab("Size of PDRB Perkapita") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)## `geom_smooth()` using formula = 'y ~ x'
## Warning in mean.default(Y): argument is not numeric or logical: returning NA
interactive.plot <- ggplot(data) +
geom_point(aes(x = X9,y = Y),color="#F4A582",shape=8, size=1) +
geom_smooth(aes(x = X9, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("IPKM vs Jumlah Sekolah") +
ylab("IPKM") +
xlab("Jumlah Sekolah") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)## `geom_smooth()` using formula = 'y ~ x'
## Warning in mean.default(Y): argument is not numeric or logical: returning NA
interactive.plot <- ggplot(data) +
geom_point(aes(x = X10,y = Y),color="#99CC00",shape=8, size=1) +
geom_smooth(aes(x = X10, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("IPKM vs Jumlah Sampang Ditangani") +
ylab("IPKM") +
xlab("Jumlah Sampang Ditangani") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)## `geom_smooth()` using formula = 'y ~ x'
boxplot tiap peubah x terhadap peubah y
boxplot(data$X8, main = "PDRB per Kapita Atas Dasar
Harga Konstan (Ribu Rupiah)",ylim = c(10000,100000),col = "lightblue")boxplot(data$X10, main = "Jumlah Tangan Ditangani (Ton per Hari)",ylim = c(50,1600),col = "lightblue")
## Bar Chart
ggplot(data, aes(x = Y)) +
geom_histogram(binwidth = 0.05, color = "black", fill = "#69b3a2", alpha = 0.8,
size = 0.1, position = "identity", show.legend = FALSE) +
geom_vline(aes(xintercept = mean(Y)), color = "red", linetype = "dashed", size = 1) +
geom_density(alpha = 0.2, fill = "#FF9999") +
ggtitle("Sebaran Nilai IPKM") +
xlab("IPKM") + ylab("Nilai") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"))## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Matriks Korelasi
## Matriks Scatterplot
dengan adanya minimal ada satu bintang (*), yang menandai bahw variabel
x tersebut memiliki korelasi atau hubungan dengan Y. dalam kasus ini X1
dan X2 tidak ditandai oleh bintang, maka dari itu kedua peubah x
tersebut tidak memiliki korelasi atau korelasi sangat kecil terhadap Y
sehingga bisa direduksi.
data_sub1 <- subset(data, select = -c(X1,X2))
model_sub1 <- lm(Y ~ X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10 , data = data)
summary(model_sub1)##
## Call:
## lm(formula = Y ~ X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.73422 -0.57105 -0.04055 0.81106 1.79122
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.476e+01 3.929e+00 19.025 2.28e-13 ***
## X3 5.548e-02 3.415e-02 1.624 0.1217
## X4 -1.578e-05 1.530e-04 -0.103 0.9190
## X5 6.006e-01 5.020e-01 1.196 0.2471
## X6 3.018e-01 1.511e-01 1.998 0.0611 .
## X7 -2.502e-07 4.559e-07 -0.549 0.5899
## X8 -2.952e-05 1.747e-05 -1.690 0.1083
## X9 -4.208e-04 2.067e-04 -2.036 0.0568 .
## X10 1.260e-03 9.944e-04 1.267 0.2214
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.238 on 18 degrees of freedom
## Multiple R-squared: 0.7698, Adjusted R-squared: 0.6675
## F-statistic: 7.525 on 8 and 18 DF, p-value: 0.0001993
diperoleh model sbg berikut:
Pengecekan Multikolinieritas Awal
## X1 X2 X3 X4 X5 X6 X7 X8
## 1.727711 3.563164 4.942742 8.759942 11.518086 2.257942 3.634924 2.354651
## X9 X10
## 2.774077 2.071004
Reduksi peubah dengan nilai VIF > 10
## Y X1 X2 X3 X4 X6 X7 X8 X9 X10
## 1 79.46 98.38 474.7 30 1861 10.64 4217206 30173 6513 396.53
## 2 79.29 99.53 186.3 9 674 7.77 3125445 17953 5012 351.54
## 3 77.82 99.18 246.8 5 700 8.41 2699814 13590 4398 457.22
## 4 83.09 99.47 258.6 15 2136 6.98 3241930 23782 422 559.62
## 5 79.77 99.60 276.7 7 847 7.60 1975221 15991 3983 330.21
## 6 76.85 98.96 194.1 2 705 4.17 2326772 13829 3367 130.02
## 7 80.46 98.63 94.0 6 782 3.75 1897867 19169 3156 151.74
## 8 83.12 98.03 140.3 12 1003 9.81 1908102 15426 2783 262.64
## 9 80.72 94.81 266.1 12 2150 8.11 2279983 15342 2565 235.00
## 10 78.09 98.16 147.1 5 1004 4.16 2027619 18197 241 155.60
## 11 81.40 99.21 120.1 3 745 7.72 3241930 21972 2397 158.00
## 12 80.23 92.34 225.0 12 902 6.49 2391567 32727 2322 666.99
## 13 81.42 96.89 155.3 11 750 7.77 3064218 18580 2266 510.00
## 14 79.18 97.98 83.4 11 1036 8.75 4173569 47924 2077 163.79
## 15 80.95 98.29 199.9 26 1309 9.87 4798312 70840 2022 349.15
## 16 83.14 97.48 201.1 53 2570 10.31 4791844 82472 198 733.79
## 17 81.22 99.55 183.7 11 1439 9.63 3248283 18080 1942 311.89
## 18 79.83 98.74 37.9 1 383 1.56 1884364 19488 1871 63.28
## 19 83.28 99.78 79.2 22 9550 10.78 4330250 33153 1762 522.50
## 20 81.31 99.58 26.6 6 7377 8.83 2562434 26161 1064 180.09
## 21 84.23 99.88 109.8 39 14776 9.55 3774861 85820 1006 1573.44
## 22 81.14 98.56 31.5 11 8646 8.42 2304944 52838 814 176.98
## 23 85.35 99.79 137.4 47 12159 8.81 4816921 28283 660 402.88
## 24 84.49 99.42 64.4 26 10622 7.82 4377232 24756 532 1268.40
## 25 83.85 99.84 31.2 8 13557 10.77 3272668 42857 508 212.34
## 26 80.97 99.79 87.1 14 3988 6.62 2363390 22879 463 253.18
## 27 79.22 99.40 12.7 4 1576 5.53 1852100 16983 361 65.87
##
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X6 + X7 + X8 + X9 + X10,
## data = databaru2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7959 -0.5100 -0.1142 0.4960 2.0393
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.442e+01 1.728e+01 4.884 0.00014 ***
## X1 -5.032e-02 1.763e-01 -0.285 0.77879
## X2 -5.190e-03 4.589e-03 -1.131 0.27377
## X3 8.059e-02 4.012e-02 2.009 0.06073 .
## X4 9.733e-05 9.691e-05 1.004 0.32933
## X6 3.312e-01 1.613e-01 2.053 0.05578 .
## X7 -1.020e-07 4.576e-07 -0.223 0.82620
## X8 -3.872e-05 1.861e-05 -2.080 0.05293 .
## X9 -3.448e-04 2.471e-04 -1.396 0.18081
## X10 1.150e-03 1.024e-03 1.122 0.27726
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.277 on 17 degrees of freedom
## Multiple R-squared: 0.769, Adjusted R-squared: 0.6467
## F-statistic: 6.287 on 9 and 17 DF, p-value: 0.0006008
## X1 X2 X3 X4 X6 X7 X8 X9
## 1.388937 3.524327 4.733953 3.098775 2.257868 3.401927 2.307777 2.508917
## X10
## 2.009955
sudah tidak ada multikol
Eksplorasi Kondisi Gauss-Markov
# Plot Sisaan vs Urutan: sisaan saling bebas
plot(x = 1:dim(databaru2)[1],
y = model2$residuals,
type = 'b',
ylab = "Residuals",
xlab = "Observation")Uji Formal Kondisi Gauss-Markov
# Asumsi Gauss-Markov: Nilai harapan sisaan sama dengan nol
t.test(model2$residuals,
mu = 0,
conf.level = 0.95)##
## One Sample t-test
##
## data: model2$residuals
## t = -8.2793e-17, df = 26, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.4083514 0.4083514
## sample estimates:
## mean of x
## -1.644775e-17
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 3.648612, Df = 1, p = 0.056116
##
## studentized Breusch-Pagan test
##
## data: model2
## BP = 13.918, df = 9, p-value = 0.1253
##
## Runs Test
##
## data: model2$residuals
## statistic = 1.201, runs = 17, n1 = 13, n2 = 13, n = 26, p-value =
## 0.2298
## alternative hypothesis: nonrandomness
##
## Durbin-Watson test
##
## data: model2
## DW = 2.3985, p-value = 0.6979
## alternative hypothesis: true autocorrelation is greater than 0
##
## Shapiro-Wilk normality test
##
## data: model2$residuals
## W = 0.9549, p-value = 0.2812
tidak memenuhi asumsi sisaan saling bebas, dilakukan cleaning data dengan mengapus laverege dan pencilan
Pemeriksaan Amatan Berpengaruh
Tabel hii dan ri
s <- sqrt(anova(model2)["Residuals", "Mean Sq"])
n = dim(databaru2)[1]
p = length(model2$coefficients)
hii=hatvalues(model2)
Obs = c(1:n)
ei = model_sub1$residuals
ri = ei/(s*sqrt(1-hii))
Di = (ri^2/p)*(hii/(1-hii))
summ <- cbind.data.frame(Obs, databaru2, hii, ri, Di)
head(summ)## Obs Y X1 X2 X3 X4 X6 X7 X8 X9 X10 hii
## 1 1 79.46 98.38 474.7 30 1861 10.64 4217206 30173 6513 396.53 0.5774583
## 2 2 79.29 99.53 186.3 9 674 7.77 3125445 17953 5012 351.54 0.2655017
## 3 3 77.82 99.18 246.8 5 700 8.41 2699814 13590 4398 457.22 0.1955294
## 4 4 83.09 99.47 258.6 15 2136 6.98 3241930 23782 422 559.62 0.4927879
## 5 5 79.77 99.60 276.7 7 847 7.60 1975221 15991 3983 330.21 0.2597117
## 6 6 76.85 98.96 194.1 2 705 4.17 2326772 13829 3367 130.02 0.2240285
## ri Di
## 1 -1.1648160 0.1854238755
## 2 0.3707866 0.0049696339
## 3 -1.4986531 0.0545888833
## 4 1.0546740 0.1080704244
## 5 -0.1215778 0.0005185603
## 6 -1.4816106 0.0633761396
Mendeteksi Titik Leverage
## 21
## 21
summ_leverage <- subset(summ, Obs %in% leverage)
summ_leverage <- subset(summ, Obs %in% leverage, select = c("Obs", "hii", "ri", "Di"))
summ_leverage## Obs hii ri Di
## 21 21 0.7595908 -0.04470642 0.0006314928
## Obs hii ri Di
## 1 21 0.7595908 -0.04470642 0.0006314928
Mendeteksi Pencilan
## integer(0)
summ_pencilan <- subset(summ, Obs %in% pencilan)
summ_pencilan <- subset(summ, Obs %in% pencilan, select = c("Obs", "hii", "ri", "Di"))
summ_pencilan## [1] Obs hii ri Di
## <0 rows> (or 0-length row.names)
## [1] Obs hii ri Di
## <0 rows> (or 0-length row.names)
Mendeteksi Amatan berpengaruh
Cook’s D
for (i in 1:dim(summ)[1]){
fcrit = qf(p=0.95, df1=p, df2=n-p)
amatan_berpengaruh <- which(Di > fcrit)
}
amatan_berpengaruh## named integer(0)
summ <- cbind.data.frame(Obs, databaru2, hii, ri, Di)
summ_sorted <- summ %>%
arrange(desc(Di))
head(summ_sorted)## Obs Y X1 X2 X3 X4 X6 X7 X8 X9 X10 hii
## 1 8 83.12 98.03 140.3 12 1003 9.81 1908102 15426 2783 262.64 0.4682587
## 2 1 79.46 98.38 474.7 30 1861 10.64 4217206 30173 6513 396.53 0.5774583
## 3 18 79.83 98.74 37.9 1 383 1.56 1884364 19488 1871 63.28 0.3930880
## 4 23 85.35 99.79 137.4 47 12159 8.81 4816921 28283 660 402.88 0.7202437
## 5 4 83.09 99.47 258.6 15 2136 6.98 3241930 23782 422 559.62 0.4927879
## 6 25 83.85 99.84 31.2 8 13557 10.77 3272668 42857 508 212.34 0.5021246
## ri Di
## 1 1.9241735 0.3260422
## 2 -1.1648160 0.1854239
## 3 1.4908770 0.1439620
## 4 0.6763291 0.1177649
## 5 1.0546740 0.1080704
## 6 1.0091493 0.1027074
Menghapus Titik Leverage dan Pencilan
## Y X1 X2 X3 X4 X6 X7 X8 X9 X10
## 1 79.46 98.38 474.7 30 1861 10.64 4217206 30173 6513 396.53
## 2 79.29 99.53 186.3 9 674 7.77 3125445 17953 5012 351.54
## 3 77.82 99.18 246.8 5 700 8.41 2699814 13590 4398 457.22
## 5 79.77 99.60 276.7 7 847 7.60 1975221 15991 3983 330.21
## 6 76.85 98.96 194.1 2 705 4.17 2326772 13829 3367 130.02
## 7 80.46 98.63 94.0 6 782 3.75 1897867 19169 3156 151.74
## 8 83.12 98.03 140.3 12 1003 9.81 1908102 15426 2783 262.64
## 9 80.72 94.81 266.1 12 2150 8.11 2279983 15342 2565 235.00
## 10 78.09 98.16 147.1 5 1004 4.16 2027619 18197 241 155.60
## 11 81.40 99.21 120.1 3 745 7.72 3241930 21972 2397 158.00
## 12 80.23 92.34 225.0 12 902 6.49 2391567 32727 2322 666.99
## 13 81.42 96.89 155.3 11 750 7.77 3064218 18580 2266 510.00
## 14 79.18 97.98 83.4 11 1036 8.75 4173569 47924 2077 163.79
## 15 80.95 98.29 199.9 26 1309 9.87 4798312 70840 2022 349.15
## 16 83.14 97.48 201.1 53 2570 10.31 4791844 82472 198 733.79
## 17 81.22 99.55 183.7 11 1439 9.63 3248283 18080 1942 311.89
## 18 79.83 98.74 37.9 1 383 1.56 1884364 19488 1871 63.28
## 19 83.28 99.78 79.2 22 9550 10.78 4330250 33153 1762 522.50
## 20 81.31 99.58 26.6 6 7377 8.83 2562434 26161 1064 180.09
## 22 81.14 98.56 31.5 11 8646 8.42 2304944 52838 814 176.98
## 23 85.35 99.79 137.4 47 12159 8.81 4816921 28283 660 402.88
## 24 84.49 99.42 64.4 26 10622 7.82 4377232 24756 532 1268.40
## 25 83.85 99.84 31.2 8 13557 10.77 3272668 42857 508 212.34
## 26 80.97 99.79 87.1 14 3988 6.62 2363390 22879 463 253.18
## 27 79.22 99.40 12.7 4 1576 5.53 1852100 16983 361 65.87
##
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X6 + X7 + X8 + X9 + X10,
## data = data6)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.5461 -0.6940 -0.1474 0.8419 1.7845
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.918e+01 1.769e+01 5.606 5.01e-05 ***
## X1 -2.011e-01 1.805e-01 -1.114 0.2828
## X2 -1.276e-02 5.113e-03 -2.496 0.0247 *
## X3 1.163e-01 3.865e-02 3.010 0.0088 **
## X4 1.078e-04 8.976e-05 1.201 0.2483
## X6 3.690e-01 1.472e-01 2.506 0.0242 *
## X7 -2.693e-07 4.700e-07 -0.573 0.5751
## X8 -3.839e-05 2.128e-05 -1.804 0.0913 .
## X9 1.035e-04 2.877e-04 0.360 0.7241
## X10 6.815e-04 1.230e-03 0.554 0.5878
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.14 on 15 degrees of freedom
## Multiple R-squared: 0.8146, Adjusted R-squared: 0.7033
## F-statistic: 7.321 on 9 and 15 DF, p-value: 0.0004259
Pemeriksaan Multikolinieritas
## X1 X2 X3 X4 X6 X7 X8 X9
## 1.755367 5.210890 4.825066 2.549684 2.293520 4.405086 2.674318 4.016978
## X10
## 1.897521
Uji Formal Kondisi Gauss-Markov
# Asumsi Gauss-Markov: Nilai harapan sisaan sama dengan nol
t.test(model3$residuals,
mu = 0,
conf.level = 0.95)##
## One Sample t-test
##
## data: model3$residuals
## t = 0, df = 24, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.3721325 0.3721325
## sample estimates:
## mean of x
## 0
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 2.372034, Df = 1, p = 0.12353
##
## studentized Breusch-Pagan test
##
## data: model3
## BP = 12.604, df = 9, p-value = 0.1814
##
## Runs Test
##
## data: model3$residuals
## statistic = 0.83485, runs = 15, n1 = 12, n2 = 12, n = 24, p-value =
## 0.4038
## alternative hypothesis: nonrandomness
##
## Durbin-Watson test
##
## data: model3
## DW = 2.2241, p-value = 0.4988
## alternative hypothesis: true autocorrelation is greater than 0
##
## Shapiro-Wilk normality test
##
## data: model3$residuals
## W = 0.97434, p-value = 0.7554
model memenuhi semua asumsi
Penentuan Model Terbaik
Forward
## Start: AIC=13.8
## Y ~ X1 + X2 + X3 + X4 + X6 + X7 + X8 + X9 + X10
##
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X6 + X7 + X8 + X9 + X10,
## data = data6)
##
## Coefficients:
## (Intercept) X1 X2 X3 X4 X6
## 9.918e+01 -2.011e-01 -1.276e-02 1.163e-01 1.078e-04 3.690e-01
## X7 X8 X9 X10
## -2.693e-07 -3.839e-05 1.035e-04 6.815e-04
Backward
## Start: AIC=13.8
## Y ~ X1 + X2 + X3 + X4 + X6 + X7 + X8 + X9 + X10
##
## Df Sum of Sq RSS AIC
## - X9 1 0.1682 19.674 12.011
## - X10 1 0.3990 19.905 12.303
## - X7 1 0.4269 19.933 12.338
## - X1 1 1.6137 21.120 13.783
## <none> 19.506 13.796
## - X4 1 1.8764 21.382 14.092
## - X8 1 4.2325 23.738 16.706
## - X2 1 8.0995 27.605 20.479
## - X6 1 8.1686 27.675 20.541
## - X3 1 11.7783 31.284 23.606
##
## Step: AIC=12.01
## Y ~ X1 + X2 + X3 + X4 + X6 + X7 + X8 + X10
##
## Df Sum of Sq RSS AIC
## - X7 1 0.3349 20.009 10.433
## - X10 1 0.4052 20.079 10.521
## - X1 1 1.4525 21.127 11.792
## <none> 19.674 12.011
## - X4 1 1.7581 21.432 12.151
## - X8 1 4.5495 24.224 15.211
## - X6 1 8.3466 28.021 18.852
## - X3 1 13.1918 32.866 22.839
## - X2 1 13.3745 33.049 22.978
##
## Step: AIC=10.43
## Y ~ X1 + X2 + X3 + X4 + X6 + X8 + X10
##
## Df Sum of Sq RSS AIC
## - X10 1 0.2230 20.232 8.710
## <none> 20.009 10.433
## - X4 1 1.8031 21.812 10.590
## - X1 1 2.3395 22.349 11.197
## - X8 1 6.1189 26.128 15.103
## - X6 1 8.0798 28.089 16.913
## - X2 1 13.7784 33.788 21.530
## - X3 1 13.8407 33.850 21.576
##
## Step: AIC=8.71
## Y ~ X1 + X2 + X3 + X4 + X6 + X8
##
## Df Sum of Sq RSS AIC
## <none> 20.232 8.7100
## - X4 1 2.0157 22.248 9.0843
## - X1 1 3.0666 23.299 10.2381
## - X8 1 7.0413 27.273 14.1760
## - X6 1 8.4260 28.658 15.4141
## - X2 1 13.9156 34.148 19.7956
## - X3 1 19.1968 39.429 23.3906
##
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X6 + X8, data = data6)
##
## Coefficients:
## (Intercept) X1 X2 X3 X4 X6
## 1.026e+02 -2.372e-01 -1.163e-02 1.087e-01 1.096e-04 3.566e-01
## X8
## -4.494e-05
Stepwise
## Start: AIC=13.8
## Y ~ X1 + X2 + X3 + X4 + X6 + X7 + X8 + X9 + X10
##
## Df Sum of Sq RSS AIC
## - X9 1 0.1682 19.674 12.011
## - X10 1 0.3990 19.905 12.303
## - X7 1 0.4269 19.933 12.338
## - X1 1 1.6137 21.120 13.783
## <none> 19.506 13.796
## - X4 1 1.8764 21.382 14.092
## - X8 1 4.2325 23.738 16.706
## - X2 1 8.0995 27.605 20.479
## - X6 1 8.1686 27.675 20.541
## - X3 1 11.7783 31.284 23.606
##
## Step: AIC=12.01
## Y ~ X1 + X2 + X3 + X4 + X6 + X7 + X8 + X10
##
## Df Sum of Sq RSS AIC
## - X7 1 0.3349 20.009 10.433
## - X10 1 0.4052 20.079 10.521
## - X1 1 1.4525 21.127 11.792
## <none> 19.674 12.011
## - X4 1 1.7581 21.432 12.151
## + X9 1 0.1682 19.506 13.796
## - X8 1 4.5495 24.224 15.211
## - X6 1 8.3466 28.021 18.852
## - X3 1 13.1918 32.866 22.839
## - X2 1 13.3745 33.049 22.978
##
## Step: AIC=10.43
## Y ~ X1 + X2 + X3 + X4 + X6 + X8 + X10
##
## Df Sum of Sq RSS AIC
## - X10 1 0.2230 20.232 8.710
## <none> 20.009 10.433
## - X4 1 1.8031 21.812 10.590
## - X1 1 2.3395 22.349 11.197
## + X7 1 0.3349 19.674 12.011
## + X9 1 0.0762 19.933 12.338
## - X8 1 6.1189 26.128 15.103
## - X6 1 8.0798 28.089 16.913
## - X2 1 13.7784 33.788 21.530
## - X3 1 13.8407 33.850 21.576
##
## Step: AIC=8.71
## Y ~ X1 + X2 + X3 + X4 + X6 + X8
##
## Df Sum of Sq RSS AIC
## <none> 20.232 8.7100
## - X4 1 2.0157 22.248 9.0843
## - X1 1 3.0666 23.299 10.2381
## + X10 1 0.2230 20.009 10.4329
## + X7 1 0.1527 20.079 10.5206
## + X9 1 0.0999 20.132 10.5863
## - X8 1 7.0413 27.273 14.1760
## - X6 1 8.4260 28.658 15.4141
## - X2 1 13.9156 34.148 19.7956
## - X3 1 19.1968 39.429 23.3906
##
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X6 + X8, data = data6)
##
## Coefficients:
## (Intercept) X1 X2 X3 X4 X6
## 1.026e+02 -2.372e-01 -1.163e-02 1.087e-01 1.096e-04 3.566e-01
## X8
## -4.494e-05
Best Subset Regression
## Best Subsets Regression
## ------------------------------------------
## Model Index Predictors
## ------------------------------------------
## 1 X4
## 2 X3 X4
## 3 X2 X3 X4
## 4 X2 X3 X6 X8
## 5 X1 X2 X3 X6 X8
## 6 X1 X2 X3 X4 X6 X8
## 7 X1 X2 X3 X4 X6 X8 X10
## 8 X1 X2 X3 X4 X6 X7 X8 X10
## 9 X1 X2 X3 X4 X6 X7 X8 X9 X10
## ------------------------------------------
##
## Subsets Regression Summary
## -------------------------------------------------------------------------------------------------------------------------------
## Adj. Pred
## Model R-Square R-Square R-Square C(p) AIC SBIC SBC MSEP FPE HSP APC
## -------------------------------------------------------------------------------------------------------------------------------
## 1 0.5245 0.5039 0.4492 17.4596 94.2821 21.6904 97.9387 54.3778 2.3484 0.0988 0.5582
## 2 0.6524 0.6208 0.5893 9.1168 88.4511 16.8145 93.3266 41.6473 1.8614 0.0791 0.4424
## 3 0.6938 0.6500 0.6053 7.7720 87.2848 16.4113 93.3792 38.5275 1.7794 0.0767 0.4229
## 4 0.7636 0.7164 0.6512 4.1191 82.8090 14.7488 90.1223 31.3006 1.4917 0.0654 0.3545
## 5 0.7885 0.7328 0.6672 4.1085 82.0312 16.1940 90.5634 29.5650 1.4520 0.0651 0.3451
## 6 0.8077 0.7435 0.6557 4.5584 81.6569 18.4694 91.4079 28.4679 1.4387 0.0661 0.3419
## 7 0.8098 0.7314 0.6378 6.3869 83.3798 21.6484 94.3497 29.9137 1.5537 0.0736 0.3693
## 8 0.8130 0.7194 0.5844 8.1294 84.9579 24.9031 97.1466 31.3740 1.6723 0.0820 0.3975
## 9 0.8146 0.7033 0.5488 10.0000 86.7432 28.2407 100.1508 33.3275 1.8206 0.0929 0.4327
## -------------------------------------------------------------------------------------------------------------------------------
## AIC: Akaike Information Criteria
## SBIC: Sawa's Bayesian Information Criteria
## SBC: Schwarz Bayesian Criteria
## MSEP: Estimated error of prediction, assuming multivariate normality
## FPE: Final Prediction Error
## HSP: Hocking's Sp
## APC: Amemiya Prediction Criteria
didaptkan model terbaik yaitu Y ~ X4 + X8 + X10
data_best <- subset(data6, select = c(X1,X2, X3, X4, X6, X8))
model_best <- lm(Y ~ X1 + X2 + X3 + X4 + X6 + X8, data = data6)
model_best##
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X6 + X8, data = data6)
##
## Coefficients:
## (Intercept) X1 X2 X3 X4 X6
## 1.026e+02 -2.372e-01 -1.163e-02 1.087e-01 1.096e-04 3.566e-01
## X8
## -4.494e-05
##
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X6 + X8, data = data6)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.4758 -0.5416 -0.0876 0.6924 1.5656
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.026e+02 1.419e+01 7.229 1.01e-06 ***
## X1 -2.372e-01 1.436e-01 -1.652 0.115924
## X2 -1.163e-02 3.306e-03 -3.519 0.002453 **
## X3 1.087e-01 2.631e-02 4.133 0.000625 ***
## X4 1.096e-04 8.187e-05 1.339 0.197186
## X6 3.566e-01 1.302e-01 2.738 0.013515 *
## X8 -4.494e-05 1.795e-05 -2.503 0.022175 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.06 on 18 degrees of freedom
## Multiple R-squared: 0.8077, Adjusted R-squared: 0.7435
## F-statistic: 12.6 on 6 and 18 DF, p-value: 1.356e-05
Uji Formal Akhir Guass-Markov
# Asumsi Gauss-Markov: Nilai harapan sisaan sama dengan nol
t.test(model_best$residuals,
mu = 0,
conf.level = 0.95)##
## One Sample t-test
##
## data: model_best$residuals
## t = 2.4184e-16, df = 24, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.3789956 0.3789956
## sample estimates:
## mean of x
## 4.440892e-17
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 2.121018, Df = 1, p = 0.14529
##
## studentized Breusch-Pagan test
##
## data: model_best
## BP = 12.277, df = 6, p-value = 0.05607
##
## Runs Test
##
## data: model_best$residuals
## statistic = -0.83485, runs = 11, n1 = 12, n2 = 12, n = 24, p-value =
## 0.4038
## alternative hypothesis: nonrandomness
##
## Durbin-Watson test
##
## data: model_best
## DW = 2.282, p-value = 0.6181
## alternative hypothesis: true autocorrelation is greater than 0
##
## Shapiro-Wilk normality test
##
## data: model_best$residuals
## W = 0.95345, p-value = 0.2995
Kesimpulan (Model Terbaik)
Persamaan Terbaik Regresi Bergandanya adalah:
\[Y=\beta0-\beta1X1-\beta2X2+\beta3X3+\beta4X4+\beta6X6-\beta8X8+e\] \[Y= 102.6 - 0.2372X1 - 0.0116X2 + 0.1087X3 + 0.0001X4 + 0.3566X6 - 0.000045X8+e\]