Inisilisasi Library

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.3.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plotly)
## Warning: package 'plotly' was built under R version 4.3.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.3.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.3.3
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(randtests)
library(ggcorrplot)
## Warning: package 'ggcorrplot' was built under R version 4.3.3
library(GGally)
## Warning: package 'GGally' was built under R version 4.3.2
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(olsrr)
## Warning: package 'olsrr' was built under R version 4.3.2
## 
## Attaching package: 'olsrr'
## The following object is masked from 'package:datasets':
## 
##     rivers
library(nortest)
library(MASS)
## Warning: package 'MASS' was built under R version 4.3.3
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:olsrr':
## 
##     cement
## The following object is masked from 'package:plotly':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
library(caret)
## Warning: package 'caret' was built under R version 4.3.3
## Loading required package: lattice
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.2
## Warning: package 'readr' was built under R version 4.3.2
## Warning: package 'stringr' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0
## ✔ readr     2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ plotly::filter() masks dplyr::filter(), stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ✖ purrr::lift()    masks caret::lift()
## ✖ car::recode()    masks dplyr::recode()
## ✖ MASS::select()   masks plotly::select(), dplyr::select()
## ✖ purrr::some()    masks car::some()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)

Import Data

library(readxl)
data_pdrb <- read_xlsx("C:/Users/Alista/Documents/Semester 4/ANREG/Data ANREG Kelompok 7 UAS.xlsx")
## New names:
## • `` -> `...17`
## • `` -> `...18`
Y <- (data_pdrb$`PDRB Awal`)/1000
X0 <- rep(1,27)
X1 <- (data_pdrb$`Balita Stunting`)/1000
X2 <- data_pdrb$`Akses Sanitasi Layak`
X3 <- data_pdrb$`Usia Harapan Hidup`
X4 <- data_pdrb$`Rata-rata Lama Sekolah`
X5 <- (data_pdrb$`Pengeluaran per Kapita`)/1000
X6 <- (data_pdrb$`Jumlah Angkatan Kerja`)/1000
X7 <- data_pdrb$`Partisipasi Angkatan Kerja`
X8 <- data_pdrb$`Tingkat Pengangguran Terbuka`

dt<-data.frame(Y,X1,X2,X3,X4,X5,X6,X7,X8)
dt
##            Y     X1    X2    X3    X4     X5       X6    X7    X8
## 1  167.96618 18.666 73.47 71.65  8.34 10.860 2897.332 63.75 10.64
## 2   50.38872 10.231 63.32 71.54  7.11  9.210 1313.905 69.11  7.77
## 3   34.55640  6.871 61.76 70.58  7.20  8.244 1222.589 69.98  8.41
## 4   88.43796 21.018 70.85 74.01  9.08 10.588 1808.799 63.64  6.98
## 5   42.01296 30.266 51.90 71.85  7.83  8.227 1330.353 68.84  7.60
## 6   26.36166 14.122 54.39 69.95  7.73  8.177  940.713 67.83  4.17
## 7   23.91889  1.872 75.50 72.30  8.00  9.428  664.523 68.47  3.75
## 8   18.45034  4.798 69.29 74.03  7.88  9.620  530.825 61.80  9.81
## 9   35.52378 10.635 86.76 72.47  7.40 10.791 1110.529 65.53  8.11
## 10  24.30077  3.063 81.26 70.76  7.49  9.950  644.128 66.21  4.16
## 11  25.64185  6.316 80.59 72.91  8.72 10.776  609.471 64.63  7.72
## 12  61.25961  3.797 86.64 72.15  6.83 10.166  952.841 69.08  6.49
## 13  30.18158  1.188 80.02 72.92  7.20 11.294  868.132 68.87  7.77
## 14  49.29337  1.438 82.46 71.47  8.11 12.193  472.075 65.21  8.75
## 15 177.47089  2.768 85.10 72.62  7.96 11.927 1195.947 65.51  9.87
## 16 265.13082  3.899 89.99 74.04  9.53 11.757 2006.507 65.41 10.31
## 17  33.39329  9.304 50.96 72.79  8.22  9.044  819.559 64.37  9.63
## 18   8.42640  0.519 83.00 71.89  8.03  9.389  260.761 79.92  1.56
## 19  35.25887  1.743 67.93 74.13 10.63 12.058  556.541 64.21 10.78
## 20   9.32416  0.806 45.80 72.85 10.14 11.229  159.618 62.48  8.83
## 21 211.24937  6.614 49.85 74.75 11.00 17.639 1435.635 69.42  9.55
## 22  18.03025  2.381 90.62 72.74 10.33 12.087  163.639 65.42  8.42
## 23  73.26065  4.575 90.80 75.48 11.44 16.239 1592.545 65.33  8.81
## 24  52.56498  3.637 96.21 74.92 11.47 15.926 1258.739 63.35  7.82
## 25  24.65273  3.036 77.18 74.50 11.21 12.500  320.574 67.22 10.77
## 26  16.78104  5.633 52.62 72.63  9.53 10.578  347.063 65.99  6.62
## 27   3.50625  0.846 88.26 71.49  8.78 10.967   94.831 63.76  5.53

Eksplorasi Peubah X terhadap Peubah Y

library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
#Plot Y vs X1
y.bar <- mean(Y)
interactive.plot1 <- ggplot(dt) +
  geom_point(aes(x = X1,y = Y),color="coral",shape=8, size=1) +
  geom_smooth(aes(x = X1, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
  ggtitle("Y vs X1") +
  ylab("Y") +
  xlab("X1") + 
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot1)
## `geom_smooth()` using formula = 'y ~ x'
#Plot Y vs X2
y.bar <- mean(Y)
interactive.plot2 <- ggplot(dt) +
  geom_point(aes(x = X2,y = Y),color="coral",shape=8, size=1) +
  geom_smooth(aes(x = X2, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
  ggtitle("Y vs X2") +
  ylab("Y") +
  xlab("X2") + 
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot2)
## `geom_smooth()` using formula = 'y ~ x'
#Plot Y vs X3
y.bar <- mean(Y)
interactive.plot3 <- ggplot(dt) +
  geom_point(aes(x = X3,y = Y),color="coral",shape=8, size=1) +
  geom_smooth(aes(x = X3, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
  ggtitle("Y") +
  ylab("Y") +
  xlab("X3") + 
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot3)
## `geom_smooth()` using formula = 'y ~ x'
#Plot Y vs X4
y.bar <- mean(Y)
interactive.plot4 <- ggplot(dt) +
  geom_point(aes(x = X4,y = Y),color="coral",shape=8, size=1) +
  geom_smooth(aes(x = X4, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
  ggtitle("Y vs X4") +
  ylab("Y") +
  xlab("X4") + 
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot4)
## `geom_smooth()` using formula = 'y ~ x'
# Menggabungkan scatter plot ke dalam satu layout
grid.arrange(interactive.plot1, interactive.plot2, interactive.plot3, interactive.plot4, nrow = 2)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#Plot Y vs X5
y.bar <- mean(Y)
interactive.plot5 <- ggplot(dt) +
  geom_point(aes(x = X5,y = Y),color="coral",shape=8, size=1) +
  geom_smooth(aes(x = X5, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
  ggtitle("Y vs X5") +
  ylab("Y") +
  xlab("X5") + 
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot5)
## `geom_smooth()` using formula = 'y ~ x'
#Plot Y vs X6
y.bar <- mean(Y)
interactive.plot6 <- ggplot(dt) +
  geom_point(aes(x = X6,y = Y),color="coral",shape=8, size=1) +
  geom_smooth(aes(x = X6, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
  ggtitle("Y vs X6") +
  ylab("Y") +
  xlab("X6") + 
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot6)
## `geom_smooth()` using formula = 'y ~ x'
##Plot Y vs X7
y.bar <- mean(Y)
interactive.plot7 <- ggplot(dt) +
  geom_point(aes(x = X7,y = Y),color="coral",shape=8, size=1) +
  geom_smooth(aes(x = X7, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
  ggtitle("Y vs X7") +
  ylab("Y") +
  xlab("X7") + 
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot7)
## `geom_smooth()` using formula = 'y ~ x'
#Plot Y vs X8
y.bar <- mean(Y)
interactive.plot8 <- ggplot(dt) +
  geom_point(aes(x = X8,y = Y),color="coral",shape=8, size=1) +
  geom_smooth(aes(x = X8, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
  ggtitle("Y vs X8") +
  ylab("Y") +
  xlab("X8") + 
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot8)
## `geom_smooth()` using formula = 'y ~ x'
# Menggabungkan scatter plot ke dalam satu layout
grid.arrange(interactive.plot5, interactive.plot6, interactive.plot7, interactive.plot8, nrow = 2)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

# Matriks Korelasi ## Heatmap

data_corrl <- round(cor(dt), 1)

data_corrl <- cor(data_corrl)
ggcorrplot(data_corrl)

library(ggcorrplot)

reduced_data <- dt[, !names(dt) %in% c("Y")]

# Compute correlation at 2 decimal places
corr_matrix = round(cor(reduced_data), 2)

# Compute and show the  result
ggcorrplot(corr_matrix, hc.order = TRUE, type = "lower",
          lab = TRUE)

# Compute correlation at 2 decimal places
corr_matrixY = round(cor(dt), 2)

# Compute and show the  result
ggcorrplot(corr_matrixY, hc.order = TRUE, type = "lower",
          lab = TRUE)

## Menggunakan chart.Correlation()

library(PerformanceAnalytics)
## Warning: package 'PerformanceAnalytics' was built under R version 4.3.3
## Loading required package: xts
## Warning: package 'xts' was built under R version 4.3.2
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
chart.Correlation(dt,
                  histogram = TRUE, # tambahkan histogram pada diagonal
                  pch = 1) # jenis point = 1
## Warning in par(usr): argument 1 does not name a graphical parameter
## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

Pemodelan Awal

model1 = lm(formula = Y ~., data = dt)
summary(model1)
## 
## Call:
## lm(formula = Y ~ ., data = dt)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -64.841 -20.235   2.806  12.658 101.894 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -71.90966  745.09737  -0.097 0.924182    
## X1           -2.98854    2.00210  -1.493 0.152836    
## X2           -0.41374    0.72270  -0.572 0.574066    
## X3           -2.29776   11.02580  -0.208 0.837258    
## X4           -1.33650   11.86676  -0.113 0.911574    
## X5            6.13531    7.60345   0.807 0.430251    
## X6            0.08510    0.02153   3.952 0.000935 ***
## X7            2.48000    3.08720   0.803 0.432272    
## X8            5.99254    5.54857   1.080 0.294395    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 43.95 on 18 degrees of freedom
## Multiple R-squared:  0.6986, Adjusted R-squared:  0.5647 
## F-statistic: 5.215 on 8 and 18 DF,  p-value: 0.001755

Uji Asumsi Klasik

Uji Non-formal

# Residual vs Y Duga
plot(model1,1)

# Residual vs Urutan
plot(x = 1:dim(dt)[1] ,
     y = model1$residuals,
     type ='b',
     ylab ="Residuals",
     xlab ="Observations")

# Normalitas QQ Plot
plot(model1,2)

## Formal

#NH Sisaan = 0
t.test(model1$residuals,
       mu = 0,
       conf.level = 0.95)
## 
##  One Sample t-test
## 
## data:  model1$residuals
## t = 5.8559e-17, df = 26, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -14.46519  14.46519
## sample estimates:
##    mean of x 
## 4.120932e-16
#Ragam Sisaan Homogen
ncvTest(model1)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 17.95736, Df = 1, p = 2.2591e-05
bptest(model1)
## 
##  studentized Breusch-Pagan test
## 
## data:  model1
## BP = 14.977, df = 8, p-value = 0.0596
abs_res <- abs(model1$residuals)
glejser <- lm(abs_res~X1+X2+X3+X4+X5+X6+X7+X8, data = dt)
summary(glejser)
## 
## Call:
## lm(formula = abs_res ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8, 
##     data = dt)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.084  -9.235  -1.629   8.107  32.047 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -30.791997 325.757732  -0.095  0.92574   
## X1           -1.453469   0.875322  -1.660  0.11413   
## X2            0.150827   0.315964   0.477  0.63885   
## X3           -1.779648   4.820498  -0.369  0.71630   
## X4            2.724337   5.188168   0.525  0.60592   
## X5            0.398705   3.324238   0.120  0.90586   
## X6            0.029487   0.009415   3.132  0.00576 **
## X7            1.548663   1.349729   1.147  0.26624   
## X8            3.237939   2.425845   1.335  0.19859   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.21 on 18 degrees of freedom
## Multiple R-squared:  0.6061, Adjusted R-squared:  0.431 
## F-statistic: 3.462 on 8 and 18 DF,  p-value: 0.01364
#Sisaan saling bebas
runs.test(model1$residuals)
## 
##  Runs Test
## 
## data:  model1$residuals
## statistic = -0.80064, runs = 12, n1 = 13, n2 = 13, n = 26, p-value =
## 0.4233
## alternative hypothesis: nonrandomness
dwtest(model1)
## 
##  Durbin-Watson test
## 
## data:  model1
## DW = 1.2481, p-value = 0.00651
## alternative hypothesis: true autocorrelation is greater than 0
bgtest(model1)
## 
##  Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  model1
## LM test = 3.8059, df = 1, p-value = 0.05107
#Sisaan Normal
shapiro.test(model1$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  model1$residuals
## W = 0.9389, p-value = 0.1144
ad.test(model1$residuals)
## 
##  Anderson-Darling normality test
## 
## data:  model1$residuals
## A = 0.62209, p-value = 0.09444
lillie.test(model1$residuals)
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  model1$residuals
## D = 0.14608, p-value = 0.1465

Multikol

vif(model1)
##       X1       X2       X3       X4       X5       X6       X7       X8 
## 2.702995 1.574271 3.190659 3.995917 4.259429 2.619660 1.597939 2.253778

tidak ada multikolinearitas

Pendeteksian Titik Tidak Umum

ols_plot_resid_lev(model1)

index <- c(1:27)
ri <- studres(model1)
ci <- cooks.distance(model1)
DFFITSi <- dffits(model1)
hi <- ols_hadi(model1)
hii <- hatvalues(model1)
hasil <- data.frame(index,ri,ci,DFFITSi,hi,hii); round(hasil,4)
##    index      ri     ci DFFITSi   hadi potential residual    hii
## 1      1 -1.1178 0.1583 -1.2019 1.7925    1.1561   0.6364 0.5362
## 2      2 -0.8078 0.0116 -0.3204 0.5010    0.1573   0.3437 0.1359
## 3      3 -1.5423 0.1108 -1.0361 1.6581    0.4514   1.2068 0.3110
## 4      4  0.4687 0.0181  0.3944 0.8237    0.7080   0.1157 0.4145
## 5      5  0.8080 0.1328  1.0828 2.1331    1.7958   0.3373 0.6423
## 6      6  0.2883 0.0049  0.2047 0.5480    0.5041   0.0439 0.3351
## 7      7  0.0728 0.0002  0.0448 0.3807    0.3779   0.0028 0.2742
## 8      8 -0.0646 0.0003 -0.0537 0.6927    0.6904   0.0022 0.4084
## 9      9 -0.5088 0.0079 -0.2616 0.4010    0.2644   0.1366 0.2091
## 10    10  0.1681 0.0011  0.0963 0.3429    0.3280   0.0150 0.2470
## 11    11  0.1027 0.0001  0.0323 0.1046    0.0990   0.0056 0.0901
## 12    12  0.0393 0.0000  0.0187 0.2277    0.2268   0.0008 0.1849
## 13    13 -1.1816 0.0495 -0.6745 1.0503    0.3258   0.7245 0.2458
## 14    14  0.1483 0.0012  0.1009 0.4748    0.4632   0.0116 0.3166
## 15    15  1.9938 0.0795  0.9132 2.2323    0.2098   2.0225 0.1734
## 16    16  4.1361 0.6816  3.4094 7.1158    0.6795   6.4363 0.4046
## 17    17 -0.2527 0.0020 -0.1297 0.2971    0.2633   0.0338 0.2084
## 18    18  0.1932 0.0101  0.2934 2.3255    2.3058   0.0197 0.6975
## 19    19 -0.5056 0.0079 -0.2614 0.4022    0.2673   0.1349 0.2109
## 20    20 -0.1907 0.0026 -0.1481 0.6218    0.6026   0.0192 0.3760
## 21    21  2.7109 1.9076  4.8192 6.0891    3.1603   2.9288 0.7596
## 22    22  0.3990 0.0091  0.2795 0.5746    0.4906   0.0840 0.3291
## 23    23 -1.8703 0.1403 -1.1991 2.1582    0.4111   1.7471 0.2913
## 24    24 -1.3005 0.0860 -0.8966 1.3429    0.4753   0.8676 0.3222
## 25    25 -0.3292 0.0085 -0.2692 0.7262    0.6690   0.0572 0.4008
## 26    26  0.2254 0.0014  0.1092 0.2617    0.2349   0.0269 0.1902
## 27    27  0.6307 0.0182  0.3979 0.6073    0.3981   0.2092 0.2847
for (i in 1:dim(hasil)[1]){
  absri <- abs(hasil$ri)
  pencilan <- which(absri > 2)
}
pencilan
## [1] 16 21
titik_leverage <- vector("list", dim(hasil)[1])
for (i in 1:dim(hasil)[1]) {
  cutoff <- 2 * 9 / 27
  titik_leverage[[i]] <- which(hii > cutoff)
}
leverages <- unlist(titik_leverage)
titik_leverage <- sort(unique(leverages))
titik_leverage
## [1] 18 21
amatan_berpengaruh <- vector("list", dim(hasil)[1])
for (i in 1:dim(hasil)[1]) {
  cutoff <- 2 * sqrt((9 / 27))
  amatan_berpengaruh[[i]] <- which(abs(DFFITSi) > cutoff)
}
berpengaruh <- unlist(amatan_berpengaruh)
amatan_berpengaruh <- sort(unique(berpengaruh))
amatan_berpengaruh
## [1]  1 16 21 23
data1 <- as.data.frame(dt[-c(18),])

model2 <- lm(Y~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8, data=data1)
summary(model2)
## 
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8, data = data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -64.213 -20.843   3.822  14.534 102.716 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   1.99785  856.09164   0.002  0.99817   
## X1           -2.94117    2.07244  -1.419  0.17392   
## X2           -0.44319    0.75831  -0.584  0.56660   
## X3           -2.74495   11.56700  -0.237  0.81525   
## X4           -1.98067   12.64496  -0.157  0.87738   
## X5            6.65089    8.25841   0.805  0.43174   
## X6            0.08496    0.02215   3.836  0.00132 **
## X7            1.84924    4.55295   0.406  0.68969   
## X8            6.23157    5.83584   1.068  0.30054   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 45.17 on 17 degrees of freedom
## Multiple R-squared:  0.692,  Adjusted R-squared:  0.5471 
## F-statistic: 4.775 on 8 and 17 DF,  p-value: 0.003281

Karena Adj-R di Model 1 lebih besar dibanding model 2, kita pakai model 1, yaitu tidak menghilangkan pencilan, leverage, dan amatan berpengaruh

Metode WLS

Untuk ragam tidak homogen

wls=1/lm(abs(model1$residuals) ~ model1$fitted.values)$fitted.values^2
model_wls <- lm(Y~., data = dt,weights = wls)
summary(model_wls)
## 
## Call:
## lm(formula = Y ~ ., data = dt, weights = wls)
## 
## Weighted Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.20192 -0.41816  0.02258  0.17761  2.29624 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  93.92699  274.15161   0.343 0.735860    
## X1           -0.97106    0.84815  -1.145 0.267236    
## X2           -0.06457    0.25439  -0.254 0.802523    
## X3           -3.49212    4.41201  -0.792 0.438957    
## X4           -1.01054    6.18365  -0.163 0.872008    
## X5            7.90444    5.73419   1.378 0.184950    
## X6            0.05972    0.01340   4.458 0.000304 ***
## X7            1.06767    0.82146   1.300 0.210097    
## X8            2.54292    2.46956   1.030 0.316786    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.01 on 18 degrees of freedom
## Multiple R-squared:  0.8279, Adjusted R-squared:  0.7514 
## F-statistic: 10.82 on 8 and 18 DF,  p-value: 1.762e-05

Uji Asumsi Klasik WLS

# Residual vs Y Duga
plot(model_wls,1)

# Residual vs Urutan
plot(x = 1:27,
     y = model_wls$residuals,
     type ='b',
     ylab ="Residuals",
     xlab ="Observations")

# Normalitas QQ Plot
plot(model_wls,2)

#NH Sisaan = 0
t.test(model_wls$residuals,
       mu = 0,
       conf.level = 0.95)
## 
##  One Sample t-test
## 
## data:  model_wls$residuals
## t = 0.54294, df = 26, p-value = 0.5918
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -11.59390  19.91711
## sample estimates:
## mean of x 
##  4.161602
#Ragam Sisaan Homogen
ncvTest(model_wls)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 13.19452, Df = 1, p = 0.00028077
bptest(model_wls)
## 
##  studentized Breusch-Pagan test
## 
## data:  model_wls
## BP = 0.0024473, df = 8, p-value = 1
abs_res <- abs(model_wls$residuals)
glejser <- lm(abs_res~X1+X2+X3+X4+X5+X6+X7+X8, data = dt, weights = wls)
summary(glejser)
## 
## Call:
## lm(formula = abs_res ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8, 
##     data = dt, weights = wls)
## 
## Weighted Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.80290 -0.31364  0.00205  0.16100  1.59426 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  37.983241 172.326302   0.220   0.8280  
## X1           -0.701495   0.533128  -1.316   0.2048  
## X2           -0.126456   0.159905  -0.791   0.4393  
## X3           -1.315696   2.773305  -0.474   0.6409  
## X4           -2.689407   3.886922  -0.692   0.4978  
## X5            6.210792   3.604400   1.723   0.1020  
## X6            0.022872   0.008421   2.716   0.0142 *
## X7            0.302536   0.516356   0.586   0.5652  
## X8            0.322247   1.552316   0.208   0.8379  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.635 on 18 degrees of freedom
## Multiple R-squared:  0.5915, Adjusted R-squared:   0.41 
## F-statistic: 3.258 on 8 and 18 DF,  p-value: 0.0178
#Sisaan Normal
shapiro.test(model_wls$residuals) 
## 
##  Shapiro-Wilk normality test
## 
## data:  model_wls$residuals
## W = 0.72863, p-value = 9.96e-06
#Sisaan saling bebas
runs.test(model_wls$residuals)
## 
##  Runs Test
## 
## data:  model_wls$residuals
## statistic = 0, runs = 14, n1 = 13, n2 = 13, n = 26, p-value = 1
## alternative hypothesis: nonrandomness

Transformasi Boxcox

Boxcox 1

library(MASS)
databc <- dt
bc <- boxcox(Y~., data = databc)

lambda <- bc$x[which.max(bc$y)]
lambda
## [1] 0.1010101
databc$Y <- ((databc$Y^lambda-1)/lambda)

modelBoxcox <- lm(Y~., data = databc)
summary(modelBoxcox)
## 
## Call:
## lm(formula = Y ~ ., data = databc)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1051 -0.5105  0.1200  0.4684  1.2882 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -6.0503924 12.4813394  -0.485 0.633697    
## X1          -0.0163050  0.0335378  -0.486 0.632715    
## X2          -0.0049119  0.0121061  -0.406 0.689718    
## X3           0.0612487  0.1846964   0.332 0.744007    
## X4          -0.2638681  0.1987836  -1.327 0.200963    
## X5           0.2179536  0.1273675   1.711 0.104218    
## X6           0.0016057  0.0003607   4.451 0.000308 ***
## X7           0.0533624  0.0517146   1.032 0.315802    
## X8           0.1688033  0.0929458   1.816 0.086041 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7362 on 18 degrees of freedom
## Multiple R-squared:  0.8194, Adjusted R-squared:  0.7392 
## F-statistic: 10.21 on 8 and 18 DF,  p-value: 2.639e-05

Pemodelan Terbaik

step(modelBoxcox)
## Start:  AIC=-9.49
## Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8
## 
##        Df Sum of Sq     RSS      AIC
## - X3    1    0.0596  9.8148 -11.3226
## - X2    1    0.0892  9.8444 -11.2413
## - X1    1    0.1281  9.8833 -11.1349
## - X7    1    0.5770 10.3322  -9.9354
## <none>               9.7552  -9.4871
## - X4    1    0.9549 10.7101  -8.9655
## - X5    1    1.5870 11.3421  -7.4174
## - X8    1    1.7876 11.5427  -6.9441
## - X6    1   10.7384 20.4935   8.5554
## 
## Step:  AIC=-11.32
## Y ~ X1 + X2 + X4 + X5 + X6 + X7 + X8
## 
##        Df Sum of Sq     RSS      AIC
## - X2    1    0.0679  9.8826 -13.1366
## - X1    1    0.1186  9.9334 -12.9982
## - X7    1    0.6075 10.4223 -11.7010
## <none>               9.8148 -11.3226
## - X4    1    0.9282 10.7429 -10.8829
## - X5    1    1.8316 11.6464  -8.7028
## - X8    1    2.2186 12.0334  -7.8201
## - X6    1   10.7205 20.5353   6.6103
## 
## Step:  AIC=-13.14
## Y ~ X1 + X4 + X5 + X6 + X7 + X8
## 
##        Df Sum of Sq     RSS      AIC
## - X1    1    0.0672  9.9498 -14.9535
## - X7    1    0.7144 10.5970 -13.2522
## <none>               9.8826 -13.1366
## - X4    1    0.8755 10.7581 -12.8447
## - X5    1    1.7637 11.6464 -10.7028
## - X8    1    2.6526 12.5352  -8.7170
## - X6    1   11.1750 21.0577   5.2886
## 
## Step:  AIC=-14.95
## Y ~ X4 + X5 + X6 + X7 + X8
## 
##        Df Sum of Sq     RSS      AIC
## - X7    1    0.7496 10.6995 -14.9923
## <none>               9.9498 -14.9535
## - X4    1    1.1290 11.0789 -14.0515
## - X8    1    2.7505 12.7004 -10.3636
## - X5    1    2.9501 12.9000  -9.9425
## - X6    1   19.1250 29.0748  11.9990
## 
## Step:  AIC=-14.99
## Y ~ X4 + X5 + X6 + X8
## 
##        Df Sum of Sq    RSS      AIC
## <none>              10.700 -14.9923
## - X4    1    1.4532 12.153 -13.5537
## - X8    1    2.0013 12.701 -12.3627
## - X5    1    3.3496 14.049  -9.6386
## - X6    1   19.6101 30.310  11.1219
## 
## Call:
## lm(formula = Y ~ X4 + X5 + X6 + X8, data = databc)
## 
## Coefficients:
## (Intercept)           X4           X5           X6           X8  
##      1.4977      -0.2760       0.2558       0.0015       0.1392
modelBCbest <- lm(formula = Y ~ X4 + X5 + X6 + X8, data = databc)
summary(modelBCbest)
## 
## Call:
## lm(formula = Y ~ X4 + X5 + X6 + X8, data = databc)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.4545 -0.5231  0.0980  0.3671  1.2845 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.4977328  0.8749287   1.712   0.1010    
## X4          -0.2759667  0.1596470  -1.729   0.0979 .  
## X5           0.2557720  0.0974604   2.624   0.0155 *  
## X6           0.0014996  0.0002362   6.350 2.17e-06 ***
## X8           0.1392436  0.0686416   2.029   0.0548 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6974 on 22 degrees of freedom
## Multiple R-squared:  0.802,  Adjusted R-squared:  0.766 
## F-statistic: 22.27 on 4 and 22 DF,  p-value: 1.805e-07

Uji Asumsi Klasik Model Best Boxcox

# Residual vs Y Duga
plot(modelBCbest,1)

# Residual vs Urutan
plot(x = 1:27,
     y = modelBCbest$residuals,
     type ='b',
     ylab ="Residuals",
     xlab ="Observations")

# Normalitas QQ Plot
plot(modelBCbest,2)

#NH Sisaan = 0
t.test(modelBCbest$residuals,
       mu = 0,
       conf.level = 0.95)
## 
##  One Sample t-test
## 
## data:  modelBCbest$residuals
## t = 5.8482e-17, df = 26, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.2537677  0.2537677
## sample estimates:
##    mean of x 
## 7.219983e-18
#Ragam Sisaan Homogen
ncvTest(modelBCbest)
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 2.8009, Df = 1, p = 0.094211
bptest(modelBCbest)
## 
##  studentized Breusch-Pagan test
## 
## data:  modelBCbest
## BP = 3.7368, df = 4, p-value = 0.4428
abs_res <- abs(modelBCbest$residuals)
glejser <- lm(abs_res~X1+X2+X3+X4+X5+X6+X7+X8, data = databc)
summary(modelBCbest)
## 
## Call:
## lm(formula = Y ~ X4 + X5 + X6 + X8, data = databc)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.4545 -0.5231  0.0980  0.3671  1.2845 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.4977328  0.8749287   1.712   0.1010    
## X4          -0.2759667  0.1596470  -1.729   0.0979 .  
## X5           0.2557720  0.0974604   2.624   0.0155 *  
## X6           0.0014996  0.0002362   6.350 2.17e-06 ***
## X8           0.1392436  0.0686416   2.029   0.0548 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6974 on 22 degrees of freedom
## Multiple R-squared:  0.802,  Adjusted R-squared:  0.766 
## F-statistic: 22.27 on 4 and 22 DF,  p-value: 1.805e-07
#Sisaan Normal
shapiro.test(modelBCbest$residuals) 
## 
##  Shapiro-Wilk normality test
## 
## data:  modelBCbest$residuals
## W = 0.97099, p-value = 0.6281
#Sisaan saling bebas
runs.test(modelBCbest$residuals)
## 
##  Runs Test
## 
## data:  modelBCbest$residuals
## statistic = 1.201, runs = 17, n1 = 13, n2 = 13, n = 26, p-value =
## 0.2298
## alternative hypothesis: nonrandomness
dwtest(modelBCbest)
## 
##  Durbin-Watson test
## 
## data:  modelBCbest
## DW = 1.4581, p-value = 0.037
## alternative hypothesis: true autocorrelation is greater than 0
library(rcompanion)
## Warning: package 'rcompanion' was built under R version 4.3.3
plotNormalHistogram(dt$Y, breaks = 10)

plotNormalHistogram(databc$Y, breaks = 10)

hist(dt$Y)

hist(databc$Y)