library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plotly)
## Warning: package 'plotly' was built under R version 4.3.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.3.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.3.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(randtests)
library(ggcorrplot)
## Warning: package 'ggcorrplot' was built under R version 4.3.3
library(GGally)
## Warning: package 'GGally' was built under R version 4.3.2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(olsrr)
## Warning: package 'olsrr' was built under R version 4.3.2
##
## Attaching package: 'olsrr'
## The following object is masked from 'package:datasets':
##
## rivers
library(nortest)
library(MASS)
## Warning: package 'MASS' was built under R version 4.3.3
##
## Attaching package: 'MASS'
## The following object is masked from 'package:olsrr':
##
## cement
## The following object is masked from 'package:plotly':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
library(caret)
## Warning: package 'caret' was built under R version 4.3.3
## Loading required package: lattice
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.2
## Warning: package 'readr' was built under R version 4.3.2
## Warning: package 'stringr' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ plotly::filter() masks dplyr::filter(), stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::lift() masks caret::lift()
## ✖ car::recode() masks dplyr::recode()
## ✖ MASS::select() masks plotly::select(), dplyr::select()
## ✖ purrr::some() masks car::some()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(readxl)
data_pdrb <- read_xlsx("C:/Users/Alista/Documents/Semester 4/ANREG/Data ANREG Kelompok 7 UAS.xlsx")
## New names:
## • `` -> `...17`
## • `` -> `...18`
Y <- (data_pdrb$`PDRB Awal`)/1000
X0 <- rep(1,27)
X1 <- (data_pdrb$`Balita Stunting`)/1000
X2 <- data_pdrb$`Akses Sanitasi Layak`
X3 <- data_pdrb$`Usia Harapan Hidup`
X4 <- data_pdrb$`Rata-rata Lama Sekolah`
X5 <- (data_pdrb$`Pengeluaran per Kapita`)/1000
X6 <- (data_pdrb$`Jumlah Angkatan Kerja`)/1000
X7 <- data_pdrb$`Partisipasi Angkatan Kerja`
X8 <- data_pdrb$`Tingkat Pengangguran Terbuka`
dt<-data.frame(Y,X1,X2,X3,X4,X5,X6,X7,X8)
dt
## Y X1 X2 X3 X4 X5 X6 X7 X8
## 1 167.96618 18.666 73.47 71.65 8.34 10.860 2897.332 63.75 10.64
## 2 50.38872 10.231 63.32 71.54 7.11 9.210 1313.905 69.11 7.77
## 3 34.55640 6.871 61.76 70.58 7.20 8.244 1222.589 69.98 8.41
## 4 88.43796 21.018 70.85 74.01 9.08 10.588 1808.799 63.64 6.98
## 5 42.01296 30.266 51.90 71.85 7.83 8.227 1330.353 68.84 7.60
## 6 26.36166 14.122 54.39 69.95 7.73 8.177 940.713 67.83 4.17
## 7 23.91889 1.872 75.50 72.30 8.00 9.428 664.523 68.47 3.75
## 8 18.45034 4.798 69.29 74.03 7.88 9.620 530.825 61.80 9.81
## 9 35.52378 10.635 86.76 72.47 7.40 10.791 1110.529 65.53 8.11
## 10 24.30077 3.063 81.26 70.76 7.49 9.950 644.128 66.21 4.16
## 11 25.64185 6.316 80.59 72.91 8.72 10.776 609.471 64.63 7.72
## 12 61.25961 3.797 86.64 72.15 6.83 10.166 952.841 69.08 6.49
## 13 30.18158 1.188 80.02 72.92 7.20 11.294 868.132 68.87 7.77
## 14 49.29337 1.438 82.46 71.47 8.11 12.193 472.075 65.21 8.75
## 15 177.47089 2.768 85.10 72.62 7.96 11.927 1195.947 65.51 9.87
## 16 265.13082 3.899 89.99 74.04 9.53 11.757 2006.507 65.41 10.31
## 17 33.39329 9.304 50.96 72.79 8.22 9.044 819.559 64.37 9.63
## 18 8.42640 0.519 83.00 71.89 8.03 9.389 260.761 79.92 1.56
## 19 35.25887 1.743 67.93 74.13 10.63 12.058 556.541 64.21 10.78
## 20 9.32416 0.806 45.80 72.85 10.14 11.229 159.618 62.48 8.83
## 21 211.24937 6.614 49.85 74.75 11.00 17.639 1435.635 69.42 9.55
## 22 18.03025 2.381 90.62 72.74 10.33 12.087 163.639 65.42 8.42
## 23 73.26065 4.575 90.80 75.48 11.44 16.239 1592.545 65.33 8.81
## 24 52.56498 3.637 96.21 74.92 11.47 15.926 1258.739 63.35 7.82
## 25 24.65273 3.036 77.18 74.50 11.21 12.500 320.574 67.22 10.77
## 26 16.78104 5.633 52.62 72.63 9.53 10.578 347.063 65.99 6.62
## 27 3.50625 0.846 88.26 71.49 8.78 10.967 94.831 63.76 5.53
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
#Plot Y vs X1
y.bar <- mean(Y)
interactive.plot1 <- ggplot(dt) +
geom_point(aes(x = X1,y = Y),color="coral",shape=8, size=1) +
geom_smooth(aes(x = X1, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Y vs X1") +
ylab("Y") +
xlab("X1") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot1)
## `geom_smooth()` using formula = 'y ~ x'
#Plot Y vs X2
y.bar <- mean(Y)
interactive.plot2 <- ggplot(dt) +
geom_point(aes(x = X2,y = Y),color="coral",shape=8, size=1) +
geom_smooth(aes(x = X2, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Y vs X2") +
ylab("Y") +
xlab("X2") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot2)
## `geom_smooth()` using formula = 'y ~ x'
#Plot Y vs X3
y.bar <- mean(Y)
interactive.plot3 <- ggplot(dt) +
geom_point(aes(x = X3,y = Y),color="coral",shape=8, size=1) +
geom_smooth(aes(x = X3, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Y") +
ylab("Y") +
xlab("X3") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot3)
## `geom_smooth()` using formula = 'y ~ x'
#Plot Y vs X4
y.bar <- mean(Y)
interactive.plot4 <- ggplot(dt) +
geom_point(aes(x = X4,y = Y),color="coral",shape=8, size=1) +
geom_smooth(aes(x = X4, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Y vs X4") +
ylab("Y") +
xlab("X4") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot4)
## `geom_smooth()` using formula = 'y ~ x'
# Menggabungkan scatter plot ke dalam satu layout
grid.arrange(interactive.plot1, interactive.plot2, interactive.plot3, interactive.plot4, nrow = 2)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
#Plot Y vs X5
y.bar <- mean(Y)
interactive.plot5 <- ggplot(dt) +
geom_point(aes(x = X5,y = Y),color="coral",shape=8, size=1) +
geom_smooth(aes(x = X5, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Y vs X5") +
ylab("Y") +
xlab("X5") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot5)
## `geom_smooth()` using formula = 'y ~ x'
#Plot Y vs X6
y.bar <- mean(Y)
interactive.plot6 <- ggplot(dt) +
geom_point(aes(x = X6,y = Y),color="coral",shape=8, size=1) +
geom_smooth(aes(x = X6, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Y vs X6") +
ylab("Y") +
xlab("X6") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot6)
## `geom_smooth()` using formula = 'y ~ x'
##Plot Y vs X7
y.bar <- mean(Y)
interactive.plot7 <- ggplot(dt) +
geom_point(aes(x = X7,y = Y),color="coral",shape=8, size=1) +
geom_smooth(aes(x = X7, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Y vs X7") +
ylab("Y") +
xlab("X7") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot7)
## `geom_smooth()` using formula = 'y ~ x'
#Plot Y vs X8
y.bar <- mean(Y)
interactive.plot8 <- ggplot(dt) +
geom_point(aes(x = X8,y = Y),color="coral",shape=8, size=1) +
geom_smooth(aes(x = X8, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Y vs X8") +
ylab("Y") +
xlab("X8") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot8)
## `geom_smooth()` using formula = 'y ~ x'
# Menggabungkan scatter plot ke dalam satu layout
grid.arrange(interactive.plot5, interactive.plot6, interactive.plot7, interactive.plot8, nrow = 2)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
# Matriks Korelasi ## Heatmap
data_corrl <- round(cor(dt), 1)
data_corrl <- cor(data_corrl)
ggcorrplot(data_corrl)
library(ggcorrplot)
reduced_data <- dt[, !names(dt) %in% c("Y")]
# Compute correlation at 2 decimal places
corr_matrix = round(cor(reduced_data), 2)
# Compute and show the result
ggcorrplot(corr_matrix, hc.order = TRUE, type = "lower",
lab = TRUE)
# Compute correlation at 2 decimal places
corr_matrixY = round(cor(dt), 2)
# Compute and show the result
ggcorrplot(corr_matrixY, hc.order = TRUE, type = "lower",
lab = TRUE)
## Menggunakan chart.Correlation()
library(PerformanceAnalytics)
## Warning: package 'PerformanceAnalytics' was built under R version 4.3.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 4.3.2
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
chart.Correlation(dt,
histogram = TRUE, # tambahkan histogram pada diagonal
pch = 1) # jenis point = 1
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
model1 = lm(formula = Y ~., data = dt)
summary(model1)
##
## Call:
## lm(formula = Y ~ ., data = dt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -64.841 -20.235 2.806 12.658 101.894
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -71.90966 745.09737 -0.097 0.924182
## X1 -2.98854 2.00210 -1.493 0.152836
## X2 -0.41374 0.72270 -0.572 0.574066
## X3 -2.29776 11.02580 -0.208 0.837258
## X4 -1.33650 11.86676 -0.113 0.911574
## X5 6.13531 7.60345 0.807 0.430251
## X6 0.08510 0.02153 3.952 0.000935 ***
## X7 2.48000 3.08720 0.803 0.432272
## X8 5.99254 5.54857 1.080 0.294395
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 43.95 on 18 degrees of freedom
## Multiple R-squared: 0.6986, Adjusted R-squared: 0.5647
## F-statistic: 5.215 on 8 and 18 DF, p-value: 0.001755
# Residual vs Y Duga
plot(model1,1)
# Residual vs Urutan
plot(x = 1:dim(dt)[1] ,
y = model1$residuals,
type ='b',
ylab ="Residuals",
xlab ="Observations")
# Normalitas QQ Plot
plot(model1,2)
## Formal
#NH Sisaan = 0
t.test(model1$residuals,
mu = 0,
conf.level = 0.95)
##
## One Sample t-test
##
## data: model1$residuals
## t = 5.8559e-17, df = 26, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -14.46519 14.46519
## sample estimates:
## mean of x
## 4.120932e-16
#Ragam Sisaan Homogen
ncvTest(model1)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 17.95736, Df = 1, p = 2.2591e-05
bptest(model1)
##
## studentized Breusch-Pagan test
##
## data: model1
## BP = 14.977, df = 8, p-value = 0.0596
abs_res <- abs(model1$residuals)
glejser <- lm(abs_res~X1+X2+X3+X4+X5+X6+X7+X8, data = dt)
summary(glejser)
##
## Call:
## lm(formula = abs_res ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8,
## data = dt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.084 -9.235 -1.629 8.107 32.047
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -30.791997 325.757732 -0.095 0.92574
## X1 -1.453469 0.875322 -1.660 0.11413
## X2 0.150827 0.315964 0.477 0.63885
## X3 -1.779648 4.820498 -0.369 0.71630
## X4 2.724337 5.188168 0.525 0.60592
## X5 0.398705 3.324238 0.120 0.90586
## X6 0.029487 0.009415 3.132 0.00576 **
## X7 1.548663 1.349729 1.147 0.26624
## X8 3.237939 2.425845 1.335 0.19859
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.21 on 18 degrees of freedom
## Multiple R-squared: 0.6061, Adjusted R-squared: 0.431
## F-statistic: 3.462 on 8 and 18 DF, p-value: 0.01364
#Sisaan saling bebas
runs.test(model1$residuals)
##
## Runs Test
##
## data: model1$residuals
## statistic = -0.80064, runs = 12, n1 = 13, n2 = 13, n = 26, p-value =
## 0.4233
## alternative hypothesis: nonrandomness
dwtest(model1)
##
## Durbin-Watson test
##
## data: model1
## DW = 1.2481, p-value = 0.00651
## alternative hypothesis: true autocorrelation is greater than 0
bgtest(model1)
##
## Breusch-Godfrey test for serial correlation of order up to 1
##
## data: model1
## LM test = 3.8059, df = 1, p-value = 0.05107
#Sisaan Normal
shapiro.test(model1$residuals)
##
## Shapiro-Wilk normality test
##
## data: model1$residuals
## W = 0.9389, p-value = 0.1144
ad.test(model1$residuals)
##
## Anderson-Darling normality test
##
## data: model1$residuals
## A = 0.62209, p-value = 0.09444
lillie.test(model1$residuals)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: model1$residuals
## D = 0.14608, p-value = 0.1465
vif(model1)
## X1 X2 X3 X4 X5 X6 X7 X8
## 2.702995 1.574271 3.190659 3.995917 4.259429 2.619660 1.597939 2.253778
tidak ada multikolinearitas
ols_plot_resid_lev(model1)
index <- c(1:27)
ri <- studres(model1)
ci <- cooks.distance(model1)
DFFITSi <- dffits(model1)
hi <- ols_hadi(model1)
hii <- hatvalues(model1)
hasil <- data.frame(index,ri,ci,DFFITSi,hi,hii); round(hasil,4)
## index ri ci DFFITSi hadi potential residual hii
## 1 1 -1.1178 0.1583 -1.2019 1.7925 1.1561 0.6364 0.5362
## 2 2 -0.8078 0.0116 -0.3204 0.5010 0.1573 0.3437 0.1359
## 3 3 -1.5423 0.1108 -1.0361 1.6581 0.4514 1.2068 0.3110
## 4 4 0.4687 0.0181 0.3944 0.8237 0.7080 0.1157 0.4145
## 5 5 0.8080 0.1328 1.0828 2.1331 1.7958 0.3373 0.6423
## 6 6 0.2883 0.0049 0.2047 0.5480 0.5041 0.0439 0.3351
## 7 7 0.0728 0.0002 0.0448 0.3807 0.3779 0.0028 0.2742
## 8 8 -0.0646 0.0003 -0.0537 0.6927 0.6904 0.0022 0.4084
## 9 9 -0.5088 0.0079 -0.2616 0.4010 0.2644 0.1366 0.2091
## 10 10 0.1681 0.0011 0.0963 0.3429 0.3280 0.0150 0.2470
## 11 11 0.1027 0.0001 0.0323 0.1046 0.0990 0.0056 0.0901
## 12 12 0.0393 0.0000 0.0187 0.2277 0.2268 0.0008 0.1849
## 13 13 -1.1816 0.0495 -0.6745 1.0503 0.3258 0.7245 0.2458
## 14 14 0.1483 0.0012 0.1009 0.4748 0.4632 0.0116 0.3166
## 15 15 1.9938 0.0795 0.9132 2.2323 0.2098 2.0225 0.1734
## 16 16 4.1361 0.6816 3.4094 7.1158 0.6795 6.4363 0.4046
## 17 17 -0.2527 0.0020 -0.1297 0.2971 0.2633 0.0338 0.2084
## 18 18 0.1932 0.0101 0.2934 2.3255 2.3058 0.0197 0.6975
## 19 19 -0.5056 0.0079 -0.2614 0.4022 0.2673 0.1349 0.2109
## 20 20 -0.1907 0.0026 -0.1481 0.6218 0.6026 0.0192 0.3760
## 21 21 2.7109 1.9076 4.8192 6.0891 3.1603 2.9288 0.7596
## 22 22 0.3990 0.0091 0.2795 0.5746 0.4906 0.0840 0.3291
## 23 23 -1.8703 0.1403 -1.1991 2.1582 0.4111 1.7471 0.2913
## 24 24 -1.3005 0.0860 -0.8966 1.3429 0.4753 0.8676 0.3222
## 25 25 -0.3292 0.0085 -0.2692 0.7262 0.6690 0.0572 0.4008
## 26 26 0.2254 0.0014 0.1092 0.2617 0.2349 0.0269 0.1902
## 27 27 0.6307 0.0182 0.3979 0.6073 0.3981 0.2092 0.2847
for (i in 1:dim(hasil)[1]){
absri <- abs(hasil$ri)
pencilan <- which(absri > 2)
}
pencilan
## [1] 16 21
titik_leverage <- vector("list", dim(hasil)[1])
for (i in 1:dim(hasil)[1]) {
cutoff <- 2 * 9 / 27
titik_leverage[[i]] <- which(hii > cutoff)
}
leverages <- unlist(titik_leverage)
titik_leverage <- sort(unique(leverages))
titik_leverage
## [1] 18 21
amatan_berpengaruh <- vector("list", dim(hasil)[1])
for (i in 1:dim(hasil)[1]) {
cutoff <- 2 * sqrt((9 / 27))
amatan_berpengaruh[[i]] <- which(abs(DFFITSi) > cutoff)
}
berpengaruh <- unlist(amatan_berpengaruh)
amatan_berpengaruh <- sort(unique(berpengaruh))
amatan_berpengaruh
## [1] 1 16 21 23
data1 <- as.data.frame(dt[-c(18),])
model2 <- lm(Y~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8, data=data1)
summary(model2)
##
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8, data = data1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -64.213 -20.843 3.822 14.534 102.716
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.99785 856.09164 0.002 0.99817
## X1 -2.94117 2.07244 -1.419 0.17392
## X2 -0.44319 0.75831 -0.584 0.56660
## X3 -2.74495 11.56700 -0.237 0.81525
## X4 -1.98067 12.64496 -0.157 0.87738
## X5 6.65089 8.25841 0.805 0.43174
## X6 0.08496 0.02215 3.836 0.00132 **
## X7 1.84924 4.55295 0.406 0.68969
## X8 6.23157 5.83584 1.068 0.30054
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 45.17 on 17 degrees of freedom
## Multiple R-squared: 0.692, Adjusted R-squared: 0.5471
## F-statistic: 4.775 on 8 and 17 DF, p-value: 0.003281
Karena Adj-R di Model 1 lebih besar dibanding model 2, kita pakai model 1, yaitu tidak menghilangkan pencilan, leverage, dan amatan berpengaruh
Untuk ragam tidak homogen
wls=1/lm(abs(model1$residuals) ~ model1$fitted.values)$fitted.values^2
model_wls <- lm(Y~., data = dt,weights = wls)
summary(model_wls)
##
## Call:
## lm(formula = Y ~ ., data = dt, weights = wls)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -1.20192 -0.41816 0.02258 0.17761 2.29624
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 93.92699 274.15161 0.343 0.735860
## X1 -0.97106 0.84815 -1.145 0.267236
## X2 -0.06457 0.25439 -0.254 0.802523
## X3 -3.49212 4.41201 -0.792 0.438957
## X4 -1.01054 6.18365 -0.163 0.872008
## X5 7.90444 5.73419 1.378 0.184950
## X6 0.05972 0.01340 4.458 0.000304 ***
## X7 1.06767 0.82146 1.300 0.210097
## X8 2.54292 2.46956 1.030 0.316786
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.01 on 18 degrees of freedom
## Multiple R-squared: 0.8279, Adjusted R-squared: 0.7514
## F-statistic: 10.82 on 8 and 18 DF, p-value: 1.762e-05
# Residual vs Y Duga
plot(model_wls,1)
# Residual vs Urutan
plot(x = 1:27,
y = model_wls$residuals,
type ='b',
ylab ="Residuals",
xlab ="Observations")
# Normalitas QQ Plot
plot(model_wls,2)
#NH Sisaan = 0
t.test(model_wls$residuals,
mu = 0,
conf.level = 0.95)
##
## One Sample t-test
##
## data: model_wls$residuals
## t = 0.54294, df = 26, p-value = 0.5918
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -11.59390 19.91711
## sample estimates:
## mean of x
## 4.161602
#Ragam Sisaan Homogen
ncvTest(model_wls)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 13.19452, Df = 1, p = 0.00028077
bptest(model_wls)
##
## studentized Breusch-Pagan test
##
## data: model_wls
## BP = 0.0024473, df = 8, p-value = 1
abs_res <- abs(model_wls$residuals)
glejser <- lm(abs_res~X1+X2+X3+X4+X5+X6+X7+X8, data = dt, weights = wls)
summary(glejser)
##
## Call:
## lm(formula = abs_res ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8,
## data = dt, weights = wls)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -0.80290 -0.31364 0.00205 0.16100 1.59426
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.983241 172.326302 0.220 0.8280
## X1 -0.701495 0.533128 -1.316 0.2048
## X2 -0.126456 0.159905 -0.791 0.4393
## X3 -1.315696 2.773305 -0.474 0.6409
## X4 -2.689407 3.886922 -0.692 0.4978
## X5 6.210792 3.604400 1.723 0.1020
## X6 0.022872 0.008421 2.716 0.0142 *
## X7 0.302536 0.516356 0.586 0.5652
## X8 0.322247 1.552316 0.208 0.8379
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.635 on 18 degrees of freedom
## Multiple R-squared: 0.5915, Adjusted R-squared: 0.41
## F-statistic: 3.258 on 8 and 18 DF, p-value: 0.0178
#Sisaan Normal
shapiro.test(model_wls$residuals)
##
## Shapiro-Wilk normality test
##
## data: model_wls$residuals
## W = 0.72863, p-value = 9.96e-06
#Sisaan saling bebas
runs.test(model_wls$residuals)
##
## Runs Test
##
## data: model_wls$residuals
## statistic = 0, runs = 14, n1 = 13, n2 = 13, n = 26, p-value = 1
## alternative hypothesis: nonrandomness
library(MASS)
databc <- dt
bc <- boxcox(Y~., data = databc)
lambda <- bc$x[which.max(bc$y)]
lambda
## [1] 0.1010101
databc$Y <- ((databc$Y^lambda-1)/lambda)
modelBoxcox <- lm(Y~., data = databc)
summary(modelBoxcox)
##
## Call:
## lm(formula = Y ~ ., data = databc)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1051 -0.5105 0.1200 0.4684 1.2882
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.0503924 12.4813394 -0.485 0.633697
## X1 -0.0163050 0.0335378 -0.486 0.632715
## X2 -0.0049119 0.0121061 -0.406 0.689718
## X3 0.0612487 0.1846964 0.332 0.744007
## X4 -0.2638681 0.1987836 -1.327 0.200963
## X5 0.2179536 0.1273675 1.711 0.104218
## X6 0.0016057 0.0003607 4.451 0.000308 ***
## X7 0.0533624 0.0517146 1.032 0.315802
## X8 0.1688033 0.0929458 1.816 0.086041 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7362 on 18 degrees of freedom
## Multiple R-squared: 0.8194, Adjusted R-squared: 0.7392
## F-statistic: 10.21 on 8 and 18 DF, p-value: 2.639e-05
step(modelBoxcox)
## Start: AIC=-9.49
## Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8
##
## Df Sum of Sq RSS AIC
## - X3 1 0.0596 9.8148 -11.3226
## - X2 1 0.0892 9.8444 -11.2413
## - X1 1 0.1281 9.8833 -11.1349
## - X7 1 0.5770 10.3322 -9.9354
## <none> 9.7552 -9.4871
## - X4 1 0.9549 10.7101 -8.9655
## - X5 1 1.5870 11.3421 -7.4174
## - X8 1 1.7876 11.5427 -6.9441
## - X6 1 10.7384 20.4935 8.5554
##
## Step: AIC=-11.32
## Y ~ X1 + X2 + X4 + X5 + X6 + X7 + X8
##
## Df Sum of Sq RSS AIC
## - X2 1 0.0679 9.8826 -13.1366
## - X1 1 0.1186 9.9334 -12.9982
## - X7 1 0.6075 10.4223 -11.7010
## <none> 9.8148 -11.3226
## - X4 1 0.9282 10.7429 -10.8829
## - X5 1 1.8316 11.6464 -8.7028
## - X8 1 2.2186 12.0334 -7.8201
## - X6 1 10.7205 20.5353 6.6103
##
## Step: AIC=-13.14
## Y ~ X1 + X4 + X5 + X6 + X7 + X8
##
## Df Sum of Sq RSS AIC
## - X1 1 0.0672 9.9498 -14.9535
## - X7 1 0.7144 10.5970 -13.2522
## <none> 9.8826 -13.1366
## - X4 1 0.8755 10.7581 -12.8447
## - X5 1 1.7637 11.6464 -10.7028
## - X8 1 2.6526 12.5352 -8.7170
## - X6 1 11.1750 21.0577 5.2886
##
## Step: AIC=-14.95
## Y ~ X4 + X5 + X6 + X7 + X8
##
## Df Sum of Sq RSS AIC
## - X7 1 0.7496 10.6995 -14.9923
## <none> 9.9498 -14.9535
## - X4 1 1.1290 11.0789 -14.0515
## - X8 1 2.7505 12.7004 -10.3636
## - X5 1 2.9501 12.9000 -9.9425
## - X6 1 19.1250 29.0748 11.9990
##
## Step: AIC=-14.99
## Y ~ X4 + X5 + X6 + X8
##
## Df Sum of Sq RSS AIC
## <none> 10.700 -14.9923
## - X4 1 1.4532 12.153 -13.5537
## - X8 1 2.0013 12.701 -12.3627
## - X5 1 3.3496 14.049 -9.6386
## - X6 1 19.6101 30.310 11.1219
##
## Call:
## lm(formula = Y ~ X4 + X5 + X6 + X8, data = databc)
##
## Coefficients:
## (Intercept) X4 X5 X6 X8
## 1.4977 -0.2760 0.2558 0.0015 0.1392
modelBCbest <- lm(formula = Y ~ X4 + X5 + X6 + X8, data = databc)
summary(modelBCbest)
##
## Call:
## lm(formula = Y ~ X4 + X5 + X6 + X8, data = databc)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.4545 -0.5231 0.0980 0.3671 1.2845
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.4977328 0.8749287 1.712 0.1010
## X4 -0.2759667 0.1596470 -1.729 0.0979 .
## X5 0.2557720 0.0974604 2.624 0.0155 *
## X6 0.0014996 0.0002362 6.350 2.17e-06 ***
## X8 0.1392436 0.0686416 2.029 0.0548 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6974 on 22 degrees of freedom
## Multiple R-squared: 0.802, Adjusted R-squared: 0.766
## F-statistic: 22.27 on 4 and 22 DF, p-value: 1.805e-07
# Residual vs Y Duga
plot(modelBCbest,1)
# Residual vs Urutan
plot(x = 1:27,
y = modelBCbest$residuals,
type ='b',
ylab ="Residuals",
xlab ="Observations")
# Normalitas QQ Plot
plot(modelBCbest,2)
#NH Sisaan = 0
t.test(modelBCbest$residuals,
mu = 0,
conf.level = 0.95)
##
## One Sample t-test
##
## data: modelBCbest$residuals
## t = 5.8482e-17, df = 26, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.2537677 0.2537677
## sample estimates:
## mean of x
## 7.219983e-18
#Ragam Sisaan Homogen
ncvTest(modelBCbest)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 2.8009, Df = 1, p = 0.094211
bptest(modelBCbest)
##
## studentized Breusch-Pagan test
##
## data: modelBCbest
## BP = 3.7368, df = 4, p-value = 0.4428
abs_res <- abs(modelBCbest$residuals)
glejser <- lm(abs_res~X1+X2+X3+X4+X5+X6+X7+X8, data = databc)
summary(modelBCbest)
##
## Call:
## lm(formula = Y ~ X4 + X5 + X6 + X8, data = databc)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.4545 -0.5231 0.0980 0.3671 1.2845
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.4977328 0.8749287 1.712 0.1010
## X4 -0.2759667 0.1596470 -1.729 0.0979 .
## X5 0.2557720 0.0974604 2.624 0.0155 *
## X6 0.0014996 0.0002362 6.350 2.17e-06 ***
## X8 0.1392436 0.0686416 2.029 0.0548 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6974 on 22 degrees of freedom
## Multiple R-squared: 0.802, Adjusted R-squared: 0.766
## F-statistic: 22.27 on 4 and 22 DF, p-value: 1.805e-07
#Sisaan Normal
shapiro.test(modelBCbest$residuals)
##
## Shapiro-Wilk normality test
##
## data: modelBCbest$residuals
## W = 0.97099, p-value = 0.6281
#Sisaan saling bebas
runs.test(modelBCbest$residuals)
##
## Runs Test
##
## data: modelBCbest$residuals
## statistic = 1.201, runs = 17, n1 = 13, n2 = 13, n = 26, p-value =
## 0.2298
## alternative hypothesis: nonrandomness
dwtest(modelBCbest)
##
## Durbin-Watson test
##
## data: modelBCbest
## DW = 1.4581, p-value = 0.037
## alternative hypothesis: true autocorrelation is greater than 0
library(rcompanion)
## Warning: package 'rcompanion' was built under R version 4.3.3
plotNormalHistogram(dt$Y, breaks = 10)
plotNormalHistogram(databc$Y, breaks = 10)
hist(dt$Y)
hist(databc$Y)