library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plotly)
## Warning: package 'plotly' was built under R version 4.3.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.3.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.3.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(randtests)
library(ggcorrplot)
## Warning: package 'ggcorrplot' was built under R version 4.3.3
library(GGally)
## Warning: package 'GGally' was built under R version 4.3.2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(olsrr)
## Warning: package 'olsrr' was built under R version 4.3.2
##
## Attaching package: 'olsrr'
## The following object is masked from 'package:datasets':
##
## rivers
library(nortest)
library(MASS)
## Warning: package 'MASS' was built under R version 4.3.3
##
## Attaching package: 'MASS'
## The following object is masked from 'package:olsrr':
##
## cement
## The following object is masked from 'package:plotly':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
library(caret)
## Warning: package 'caret' was built under R version 4.3.3
## Loading required package: lattice
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.2
## Warning: package 'readr' was built under R version 4.3.2
## Warning: package 'stringr' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ plotly::filter() masks dplyr::filter(), stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::lift() masks caret::lift()
## ✖ car::recode() masks dplyr::recode()
## ✖ MASS::select() masks plotly::select(), dplyr::select()
## ✖ purrr::some() masks car::some()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(readxl)
data_pdrb <- read_xlsx("C:/Users/Alista/Documents/Semester 4/ANREG/Data ANREG Kelompok 7 UAS.xlsx")
## New names:
## • `` -> `...17`
## • `` -> `...18`
Y <- log(data_pdrb$`PDRB Awal`)
X0 <- rep(1,27)
X1 <- log(data_pdrb$`Balita Stunting`)
X2 <- data_pdrb$`Akses Sanitasi Layak`
X3 <- data_pdrb$`Usia Harapan Hidup`
X4 <- data_pdrb$`Rata-rata Lama Sekolah`
X5 <- log(data_pdrb$`Pengeluaran per Kapita`)
X6 <- log(data_pdrb$`Jumlah Angkatan Kerja`)
X7 <- data_pdrb$`Partisipasi Angkatan Kerja`
X8 <- data_pdrb$`Tingkat Pengangguran Terbuka`
dt<-data.frame(Y,X1,X2,X3,X4,X5,X6,X7,X8)
dt
## Y X1 X2 X3 X4 X5 X6 X7 X8
## 1 12.031518 9.834459 73.47 71.65 8.34 9.292842 14.87930 63.75 10.64
## 2 10.827523 9.233178 63.32 71.54 7.11 9.128045 14.08851 69.11 7.77
## 3 10.450348 8.835065 61.76 70.58 7.20 9.017241 14.01648 69.98 8.41
## 4 11.390057 9.953134 70.85 74.01 9.08 9.267477 14.40817 63.64 6.98
## 5 10.645733 10.317780 51.90 71.85 7.83 9.015177 14.10095 68.84 7.60
## 6 10.179666 9.555489 54.39 69.95 7.73 9.009081 13.75439 67.83 4.17
## 7 10.082424 7.534763 75.50 72.30 8.00 9.151439 13.40682 68.47 3.75
## 8 9.822838 8.475954 69.29 74.03 7.88 9.171600 13.18219 61.80 9.81
## 9 10.477958 9.271906 86.76 72.47 7.40 9.286468 13.92035 65.53 8.11
## 10 10.098263 8.027150 81.26 70.76 7.49 9.205328 13.37565 66.21 4.16
## 11 10.151981 8.750841 80.59 72.91 8.72 9.285077 13.32035 64.63 7.72
## 12 11.022876 8.241967 86.64 72.15 6.83 9.226804 13.76720 69.08 6.49
## 13 10.314987 7.080026 80.02 72.92 7.20 9.332027 13.67410 68.87 7.77
## 14 10.805545 7.271009 82.46 71.47 8.11 9.408617 13.06489 65.21 8.75
## 15 12.086562 7.925880 85.10 72.62 7.96 9.386560 13.99445 65.51 9.87
## 16 12.487979 8.268475 89.99 74.04 9.53 9.372204 14.51191 65.41 10.31
## 17 10.416110 9.138200 50.96 72.79 8.22 9.109857 13.61652 64.37 9.63
## 18 9.039125 6.251904 83.00 71.89 8.03 9.147294 12.47136 79.92 1.56
## 19 10.470472 7.463363 67.93 74.13 10.63 9.397484 13.22950 64.21 10.78
## 20 9.140364 6.692084 45.80 72.85 10.14 9.326255 11.98054 62.48 8.83
## 21 12.260795 8.796944 49.85 74.75 11.00 9.777868 14.17712 69.42 9.55
## 22 9.799806 7.775276 90.62 72.74 10.33 9.399886 12.00542 65.42 8.42
## 23 11.201779 8.428362 90.80 75.48 11.44 9.695171 14.28084 65.33 8.81
## 24 10.869805 8.198914 96.21 74.92 11.47 9.675708 14.04562 63.35 7.82
## 25 10.112643 8.018296 77.18 74.50 11.21 9.433484 12.67787 67.22 10.77
## 26 9.728005 8.636397 52.62 72.63 9.53 9.266532 12.75726 65.99 6.62
## 27 8.162302 6.740519 88.26 71.49 8.78 9.302646 11.45985 63.76 5.53
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
#Plot Y vs X1
y.bar <- mean(Y)
interactive.plot1 <- ggplot(dt) +
geom_point(aes(x = X1,y = Y),color="coral",shape=8, size=1) +
geom_smooth(aes(x = X1, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Y vs X1") +
ylab("Y") +
xlab("X1") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot1)
## `geom_smooth()` using formula = 'y ~ x'
#Plot Y vs X2
y.bar <- mean(Y)
interactive.plot2 <- ggplot(dt) +
geom_point(aes(x = X2,y = Y),color="coral",shape=8, size=1) +
geom_smooth(aes(x = X2, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Y vs X2") +
ylab("Y") +
xlab("X2") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot2)
## `geom_smooth()` using formula = 'y ~ x'
#Plot Y vs X3
y.bar <- mean(Y)
interactive.plot3 <- ggplot(dt) +
geom_point(aes(x = X3,y = Y),color="coral",shape=8, size=1) +
geom_smooth(aes(x = X3, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Y") +
ylab("Y") +
xlab("X3") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot3)
## `geom_smooth()` using formula = 'y ~ x'
#Plot Y vs X4
y.bar <- mean(Y)
interactive.plot4 <- ggplot(dt) +
geom_point(aes(x = X4,y = Y),color="coral",shape=8, size=1) +
geom_smooth(aes(x = X4, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Y vs X4") +
ylab("Y") +
xlab("X4") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot4)
## `geom_smooth()` using formula = 'y ~ x'
# Menggabungkan scatter plot ke dalam satu layout
grid.arrange(interactive.plot1, interactive.plot2, interactive.plot3, interactive.plot4, nrow = 2)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
#Plot Y vs X5
y.bar <- mean(Y)
interactive.plot5 <- ggplot(dt) +
geom_point(aes(x = X5,y = Y),color="coral",shape=8, size=1) +
geom_smooth(aes(x = X5, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Y vs X5") +
ylab("Y") +
xlab("X5") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot5)
## `geom_smooth()` using formula = 'y ~ x'
#Plot Y vs X6
y.bar <- mean(Y)
interactive.plot6 <- ggplot(dt) +
geom_point(aes(x = X6,y = Y),color="coral",shape=8, size=1) +
geom_smooth(aes(x = X6, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Y vs X6") +
ylab("Y") +
xlab("X6") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot6)
## `geom_smooth()` using formula = 'y ~ x'
##Plot Y vs X7
y.bar <- mean(Y)
interactive.plot7 <- ggplot(dt) +
geom_point(aes(x = X7,y = Y),color="coral",shape=8, size=1) +
geom_smooth(aes(x = X7, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Y vs X7") +
ylab("Y") +
xlab("X7") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot7)
## `geom_smooth()` using formula = 'y ~ x'
#Plot Y vs X8
y.bar <- mean(Y)
interactive.plot8 <- ggplot(dt) +
geom_point(aes(x = X8,y = Y),color="coral",shape=8, size=1) +
geom_smooth(aes(x = X8, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Y vs X8") +
ylab("Y") +
xlab("X8") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot8)
## `geom_smooth()` using formula = 'y ~ x'
# Menggabungkan scatter plot ke dalam satu layout
grid.arrange(interactive.plot5, interactive.plot6, interactive.plot7, interactive.plot8, nrow = 2)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
# Matriks Korelasi ## Heatmap
data_corrl <- round(cor(dt), 1)
data_corrl <- cor(data_corrl)
ggcorrplot(data_corrl)
library(ggcorrplot)
reduced_data <- dt[, !names(dt) %in% c("Y")]
# Compute correlation at 2 decimal places
corr_matrix = round(cor(reduced_data), 2)
# Compute and show the result
ggcorrplot(corr_matrix, hc.order = TRUE, type = "lower",
lab = TRUE)
# Compute correlation at 2 decimal places
corr_matrixY = round(cor(dt), 2)
# Compute and show the result
ggcorrplot(corr_matrixY, hc.order = TRUE, type = "lower",
lab = TRUE)
## Menggunakan chart.Correlation()
library(PerformanceAnalytics)
## Warning: package 'PerformanceAnalytics' was built under R version 4.3.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 4.3.2
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
chart.Correlation(dt,
histogram = TRUE, # tambahkan histogram pada diagonal
pch = 1) # jenis point = 1
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter
model1 = lm(formula = Y ~., data = dt)
summary(model1)
##
## Call:
## lm(formula = Y ~ ., data = dt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.52698 -0.16510 -0.02792 0.24233 0.69871
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -11.006545 9.446740 -1.165 0.2592
## X1 -0.048126 0.148858 -0.323 0.7502
## X2 0.001046 0.006849 0.153 0.8803
## X3 -0.123339 0.108760 -1.134 0.2717
## X4 0.009809 0.125327 0.078 0.9385
## X5 1.589795 0.920848 1.726 0.1014
## X6 0.972740 0.187238 5.195 6.1e-05 ***
## X7 0.027516 0.030977 0.888 0.3861
## X8 0.128233 0.051260 2.502 0.0222 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4171 on 18 degrees of freedom
## Multiple R-squared: 0.8775, Adjusted R-squared: 0.8231
## F-statistic: 16.12 on 8 and 18 DF, p-value: 9.609e-07
# Residual vs Y Duga
plot(model1,1)
# Residual vs Urutan
plot(x = 1:dim(dt)[1] ,
y = model1$residuals,
type ='b',
ylab ="Residuals",
xlab ="Observations")
# Normalitas QQ Plot
plot(model1,2)
## Non-formal
#NH Sisaan = 0
t.test(model1$residuals,
mu = 0,
conf.level = 0.95)
##
## One Sample t-test
##
## data: model1$residuals
## t = 8.4734e-17, df = 26, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1372784 0.1372784
## sample estimates:
## mean of x
## 5.658933e-18
#Ragam Sisaan Homogen
ncvTest(model1)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 4.404522, Df = 1, p = 0.035844
bptest(model1)
##
## studentized Breusch-Pagan test
##
## data: model1
## BP = 13.897, df = 8, p-value = 0.08449
abs_res <- abs(model1$residuals)
glejser <- lm(abs_res~X1+X2+X3+X4+X5+X6+X7+X8, data = dt)
summary(glejser)
##
## Call:
## lm(formula = abs_res ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8,
## data = dt)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.240877 -0.114018 0.003462 0.103582 0.314787
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.049875 3.908115 -0.780 0.4453
## X1 -0.044751 0.061583 -0.727 0.4768
## X2 0.006353 0.002833 2.242 0.0378 *
## X3 0.003699 0.044994 0.082 0.9354
## X4 -0.019582 0.051848 -0.378 0.7101
## X5 0.175127 0.380955 0.460 0.6512
## X6 0.115113 0.077460 1.486 0.1546
## X7 -0.002620 0.012815 -0.204 0.8403
## X8 0.015526 0.021206 0.732 0.4735
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1725 on 18 degrees of freedom
## Multiple R-squared: 0.5379, Adjusted R-squared: 0.3326
## F-statistic: 2.619 on 8 and 18 DF, p-value: 0.04278
#Sisaan saling bebas
runs.test(model1$residuals)
##
## Runs Test
##
## data: model1$residuals
## statistic = 0.40032, runs = 15, n1 = 13, n2 = 13, n = 26, p-value =
## 0.6889
## alternative hypothesis: nonrandomness
dwtest(model1)
##
## Durbin-Watson test
##
## data: model1
## DW = 1.8214, p-value = 0.1779
## alternative hypothesis: true autocorrelation is greater than 0
bgtest(model1)
##
## Breusch-Godfrey test for serial correlation of order up to 1
##
## data: model1
## LM test = 0.17605, df = 1, p-value = 0.6748
#Sisaan Normal
shapiro.test(model1$residuals)
##
## Shapiro-Wilk normality test
##
## data: model1$residuals
## W = 0.9563, p-value = 0.3034
vif(model1)
## X1 X2 X3 X4 X5 X6 X7 X8
## 3.536830 1.569843 3.447035 4.948653 4.774637 3.647674 1.786338 2.135749
tidak ada multikolinearitas
ols_plot_resid_lev(model1)
index <- c(1:27)
ri <- studres(model1)
ci <- cooks.distance(model1)
DFFITSi <- dffits(model1)
hi <- ols_hadi(model1)
hii <- hatvalues(model1)
hasil <- data.frame(index,ri,ci,DFFITSi,hi,hii); round(hasil,4)
## index ri ci DFFITSi hadi potential residual hii
## 1 1 -0.5071 0.0151 -0.3604 0.6407 0.5052 0.1354 0.3357
## 2 2 -0.3746 0.0028 -0.1537 0.2425 0.1683 0.0742 0.1441
## 3 3 -1.5705 0.1157 -1.0611 1.7055 0.4565 1.2489 0.3134
## 4 4 1.3289 0.0909 0.9236 1.3873 0.4830 0.9043 0.3257
## 5 5 -0.1052 0.0005 -0.0674 0.4159 0.4100 0.0059 0.2908
## 6 6 0.1129 0.0010 0.0911 0.6572 0.6505 0.0067 0.3941
## 7 7 0.7196 0.0241 0.4599 0.6801 0.4084 0.2717 0.2900
## 8 8 -0.4496 0.0182 -0.3957 0.8809 0.7745 0.1065 0.4364
## 9 9 -1.2447 0.0567 -0.7254 1.1414 0.3397 0.8017 0.2535
## 10 10 0.1600 0.0010 0.0940 0.3583 0.3447 0.0136 0.2564
## 11 11 -0.2085 0.0006 -0.0718 0.1416 0.1186 0.0230 0.1060
## 12 12 0.9972 0.0265 0.4879 0.7600 0.2394 0.5206 0.1932
## 13 13 -1.5908 0.1194 -1.0799 1.7405 0.4609 1.2796 0.3155
## 14 14 0.6366 0.0186 0.4019 0.6117 0.3986 0.2131 0.2850
## 15 15 1.8791 0.0808 0.9105 2.0331 0.2348 1.7983 0.1901
## 16 16 2.2676 0.2273 1.5864 2.9656 0.4894 2.4762 0.3286
## 17 17 -0.0729 0.0002 -0.0369 0.2589 0.2561 0.0028 0.2039
## 18 18 -0.1102 0.0036 -0.1760 2.5571 2.5506 0.0064 0.7184
## 19 19 -0.4242 0.0080 -0.2615 0.4751 0.3801 0.0950 0.2754
## 20 20 -0.0854 0.0009 -0.0883 1.0739 1.0700 0.0039 0.5169
## 21 21 1.4372 0.7211 2.6218 4.3280 3.3279 1.0001 0.7689
## 22 22 1.4612 0.1548 1.2168 1.7685 0.6935 1.0750 0.4095
## 23 23 -1.5248 0.1000 -0.9827 1.5988 0.4154 1.1834 0.2935
## 24 24 -1.5354 0.1389 -1.1595 1.7585 0.5704 1.1881 0.3632
## 25 25 -0.1711 0.0022 -0.1376 0.6620 0.6465 0.0155 0.3927
## 26 26 0.4039 0.0050 0.2061 0.3466 0.2604 0.0862 0.2066
## 27 27 -0.8169 0.0488 -0.6566 0.9940 0.6461 0.3479 0.3925
for (i in 1:dim(hasil)[1]){
absri <- abs(hasil$ri)
pencilan <- which(absri > 2)
}
pencilan
## [1] 16
titik_leverage <- vector("list", dim(hasil)[1])
for (i in 1:dim(hasil)[1]) {
cutoff <- 2 * 9 / 27
titik_leverage[[i]] <- which(hii > cutoff)
}
leverages <- unlist(titik_leverage)
titik_leverage <- sort(unique(leverages))
titik_leverage
## [1] 18 21
amatan_berpengaruh <- vector("list", dim(hasil)[1])
for (i in 1:dim(hasil)[1]) {
cutoff <- 2 * sqrt((9 / 27))
amatan_berpengaruh[[i]] <- which(abs(DFFITSi) > cutoff)
}
berpengaruh <- unlist(amatan_berpengaruh)
amatan_berpengaruh <- sort(unique(berpengaruh))
amatan_berpengaruh
## [1] 16 21 22 24
data1 <- as.data.frame(dt[-c(18),])
model2 <- lm(Y~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8, data=data1)
summary(model2)
##
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8, data = data1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.53912 -0.15670 -0.03189 0.23697 0.69629
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -11.136621 9.788600 -1.138 0.2710
## X1 -0.051500 0.156151 -0.330 0.7456
## X2 0.001193 0.007170 0.166 0.8698
## X3 -0.120493 0.114818 -1.049 0.3087
## X4 0.012186 0.130707 0.093 0.9268
## X5 1.559298 0.986821 1.580 0.1325
## X6 0.972004 0.192714 5.044 1e-04 ***
## X7 0.030881 0.044134 0.700 0.4936
## X8 0.127287 0.053422 2.383 0.0291 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.429 on 17 degrees of freedom
## Multiple R-squared: 0.8656, Adjusted R-squared: 0.8024
## F-statistic: 13.69 on 8 and 17 DF, p-value: 5.075e-06
Karena Adj-R di Model 1 lebih besar dibanding model 2, kita pakai model 1, yaitu tidak menghilangkan pencilan, leverage, dan amatan berpengaruh
Untuk ragam tidak homogen
wls=1/lm(abs(model1$residuals) ~ model1$fitted.values)$fitted.values^2
model_wls <- lm(Y~., data = dt,weights = wls)
summary(model_wls)
##
## Call:
## lm(formula = Y ~ ., data = dt, weights = wls)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -1.9315 -0.9051 -0.2439 0.9128 2.9316
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.976546 11.016601 -0.906 0.37711
## X1 -0.004678 0.125873 -0.037 0.97076
## X2 0.001908 0.005620 0.339 0.73819
## X3 -0.143382 0.101183 -1.417 0.17354
## X4 0.130155 0.129888 1.002 0.32960
## X5 1.238163 1.099460 1.126 0.27489
## X6 1.049734 0.162908 6.444 4.6e-06 ***
## X7 0.044201 0.021676 2.039 0.05639 .
## X8 0.140293 0.048038 2.920 0.00913 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.528 on 18 degrees of freedom
## Multiple R-squared: 0.9674, Adjusted R-squared: 0.953
## F-statistic: 66.86 on 8 and 18 DF, p-value: 8.274e-12
# Residual vs Y Duga
plot(model_wls,1)
# Residual vs Urutan
plot(x = 1:27,
y = model_wls$residuals,
type ='b',
ylab ="Residuals",
xlab ="Observations")
# Normalitas QQ Plot
plot(model_wls,2)
#NH Sisaan = 0
t.test(model_wls$residuals,
mu = 0,
conf.level = 0.95)
##
## One Sample t-test
##
## data: model_wls$residuals
## t = -0.37174, df = 26, p-value = 0.7131
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1737406 0.1205235
## sample estimates:
## mean of x
## -0.02660859
#Ragam Sisaan Homogen
ncvTest(model_wls)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 0.05611493, Df = 1, p = 0.81275
bptest(model_wls)
##
## studentized Breusch-Pagan test
##
## data: model_wls
## BP = 1330.3, df = 8, p-value < 2.2e-16
abs_res <- abs(model_wls$residuals)
glejser <- lm(abs_res~X1+X2+X3+X4+X5+X6+X7+X8, data = dt, weights = wls)
summary(glejser)
##
## Call:
## lm(formula = abs_res ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8,
## data = dt, weights = wls)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -0.73235 -0.40035 -0.01296 0.27397 0.91670
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.933208 4.335500 0.677 0.50729
## X1 -0.061931 0.049536 -1.250 0.22723
## X2 0.006848 0.002212 3.096 0.00623 **
## X3 -0.079241 0.039820 -1.990 0.06200 .
## X4 0.103423 0.051117 2.023 0.05815 .
## X5 -0.136358 0.432684 -0.315 0.75628
## X6 0.226556 0.064111 3.534 0.00237 **
## X7 0.002038 0.008530 0.239 0.81387
## X8 0.039717 0.018905 2.101 0.05000 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6015 on 18 degrees of freedom
## Multiple R-squared: 0.767, Adjusted R-squared: 0.6634
## F-statistic: 7.406 on 8 and 18 DF, p-value: 0.0002203
#Sisaan Normal
shapiro.test(model_wls$residuals)
##
## Shapiro-Wilk normality test
##
## data: model_wls$residuals
## W = 0.9832, p-value = 0.9264
#Sisaan saling bebas
runs.test(model_wls$residuals)
##
## Runs Test
##
## data: model_wls$residuals
## statistic = 0.80064, runs = 16, n1 = 13, n2 = 13, n = 26, p-value =
## 0.4233
## alternative hypothesis: nonrandomness
library(MASS)
databc <- dt
bc <- boxcox(Y~., data = databc)
lambda <- bc$x[which.max(bc$y)]
lambda
## [1] -0.1010101
databc$Y <- ((databc$Y^lambda-1)/lambda)
modelBoxcox <- lm(Y~., data = databc)
summary(modelBoxcox)
##
## Call:
## lm(formula = Y ~ ., data = databc)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.03736 -0.01549 -0.00240 0.01863 0.04534
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.564e-01 6.897e-01 0.662 0.5165
## X1 -2.492e-03 1.087e-02 -0.229 0.8212
## X2 7.685e-05 5.001e-04 0.154 0.8796
## X3 -8.049e-03 7.941e-03 -1.014 0.3242
## X4 5.541e-04 9.151e-03 0.061 0.9524
## X5 1.091e-01 6.723e-02 1.623 0.1219
## X6 7.392e-02 1.367e-02 5.407 3.88e-05 ***
## X7 2.146e-03 2.262e-03 0.949 0.3552
## X8 9.639e-03 3.743e-03 2.575 0.0191 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03045 on 18 degrees of freedom
## Multiple R-squared: 0.8866, Adjusted R-squared: 0.8362
## F-statistic: 17.59 on 8 and 18 DF, p-value: 4.95e-07
step(modelBoxcox)
## Start: AIC=-181.49
## Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8
##
## Df Sum of Sq RSS AIC
## - X4 1 0.0000034 0.016695 -183.49
## - X2 1 0.0000219 0.016714 -183.46
## - X1 1 0.0000487 0.016741 -183.41
## - X7 1 0.0008350 0.017527 -182.18
## - X3 1 0.0009527 0.017645 -182.00
## <none> 0.016692 -181.49
## - X5 1 0.0024436 0.019136 -179.81
## - X8 1 0.0061508 0.022843 -175.02
## - X6 1 0.0271137 0.043806 -157.44
##
## Step: AIC=-183.49
## Y ~ X1 + X2 + X3 + X5 + X6 + X7 + X8
##
## Df Sum of Sq RSS AIC
## - X2 1 0.000019 0.016714 -185.46
## - X1 1 0.000046 0.016741 -185.41
## - X7 1 0.000836 0.017531 -184.17
## - X3 1 0.001194 0.017889 -183.62
## <none> 0.016695 -183.49
## - X5 1 0.004286 0.020981 -179.32
## - X8 1 0.006154 0.022849 -177.02
## - X6 1 0.035724 0.052419 -154.60
##
## Step: AIC=-185.46
## Y ~ X1 + X3 + X5 + X6 + X7 + X8
##
## Df Sum of Sq RSS AIC
## - X1 1 0.000083 0.016797 -187.32
## - X7 1 0.000821 0.017536 -186.16
## - X3 1 0.001202 0.017916 -185.58
## <none> 0.016714 -185.46
## - X5 1 0.004602 0.021316 -180.89
## - X8 1 0.006405 0.023119 -178.70
## - X6 1 0.040415 0.057129 -154.27
##
## Step: AIC=-187.32
## Y ~ X3 + X5 + X6 + X7 + X8
##
## Df Sum of Sq RSS AIC
## - X7 1 0.001115 0.017912 -187.59
## - X3 1 0.001237 0.018033 -187.41
## <none> 0.016797 -187.32
## - X5 1 0.006020 0.022817 -181.05
## - X8 1 0.006361 0.023158 -180.65
## - X6 1 0.082381 0.099178 -141.38
##
## Step: AIC=-187.59
## Y ~ X3 + X5 + X6 + X8
##
## Df Sum of Sq RSS AIC
## - X3 1 0.001248 0.019159 -187.77
## <none> 0.017912 -187.59
## - X8 1 0.005343 0.023254 -182.54
## - X5 1 0.005945 0.023856 -181.85
## - X6 1 0.088709 0.106621 -141.43
##
## Step: AIC=-187.77
## Y ~ X5 + X6 + X8
##
## Df Sum of Sq RSS AIC
## <none> 0.019159 -187.77
## - X8 1 0.004277 0.023436 -184.33
## - X5 1 0.005310 0.024469 -183.17
## - X6 1 0.087812 0.106971 -143.34
##
## Call:
## lm(formula = Y ~ X5 + X6 + X8, data = databc)
##
## Coefficients:
## (Intercept) X5 X6 X8
## 0.309642 0.080972 0.072598 0.006281
modelBCbest <- lm(formula = Y ~ X5 + X6 + X8, data = databc)
summary(modelBCbest)
##
## Call:
## lm(formula = Y ~ X5 + X6 + X8, data = databc)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.042941 -0.015785 -0.003192 0.011683 0.055513
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.309642 0.309166 1.002 0.3270
## X5 0.080972 0.032072 2.525 0.0189 *
## X6 0.072598 0.007071 10.267 4.64e-10 ***
## X8 0.006281 0.002772 2.266 0.0332 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02886 on 23 degrees of freedom
## Multiple R-squared: 0.8698, Adjusted R-squared: 0.8528
## F-statistic: 51.22 on 3 and 23 DF, p-value: 2.435e-10
# Residual vs Y Duga
plot(modelBCbest,1)
# Residual vs Urutan
plot(x = 1:27,
y = modelBCbest$residuals,
type ='b',
ylab ="Residuals",
xlab ="Observations")
# Normalitas QQ Plot
plot(modelBCbest,2)
#NH Sisaan = 0
t.test(modelBCbest$residuals,
mu = 0,
conf.level = 0.95)
##
## One Sample t-test
##
## data: modelBCbest$residuals
## t = 4.1507e-17, df = 26, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.01073856 0.01073856
## sample estimates:
## mean of x
## 2.168404e-19
#Ragam Sisaan Homogen
ncvTest(modelBCbest)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 0.6653545, Df = 1, p = 0.41468
bptest(modelBCbest)
##
## studentized Breusch-Pagan test
##
## data: modelBCbest
## BP = 6.3248, df = 3, p-value = 0.09684
abs_res <- abs(modelBCbest$residuals)
glejser <- lm(abs_res~X1+X2+X3+X4+X5+X6+X7+X8, data = databc)
summary(modelBCbest)
##
## Call:
## lm(formula = Y ~ X5 + X6 + X8, data = databc)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.042941 -0.015785 -0.003192 0.011683 0.055513
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.309642 0.309166 1.002 0.3270
## X5 0.080972 0.032072 2.525 0.0189 *
## X6 0.072598 0.007071 10.267 4.64e-10 ***
## X8 0.006281 0.002772 2.266 0.0332 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02886 on 23 degrees of freedom
## Multiple R-squared: 0.8698, Adjusted R-squared: 0.8528
## F-statistic: 51.22 on 3 and 23 DF, p-value: 2.435e-10
#Sisaan Normal
shapiro.test(modelBCbest$residuals)
##
## Shapiro-Wilk normality test
##
## data: modelBCbest$residuals
## W = 0.9597, p-value = 0.3638
#Sisaan saling bebas
runs.test(modelBCbest$residuals)
##
## Runs Test
##
## data: modelBCbest$residuals
## statistic = 1.201, runs = 17, n1 = 13, n2 = 13, n = 26, p-value =
## 0.2298
## alternative hypothesis: nonrandomness
dwtest(modelBCbest)
##
## Durbin-Watson test
##
## data: modelBCbest
## DW = 1.6868, p-value = 0.1617
## alternative hypothesis: true autocorrelation is greater than 0
library(rcompanion)
## Warning: package 'rcompanion' was built under R version 4.3.3
plotNormalHistogram(dt$Y, breaks = 10)
plotNormalHistogram(databc$Y, breaks = 10)
hist(dt$Y)
hist(databc$Y)