Tugas Akhir Analisis Regresi
Packages
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggridges)
library(treemap)#install.packages("treemap")
library(treemapify)#install.packages("treemapify")
library(GGally) ## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(dplyr)
library(openxlsx)#install.packages("openxlsx")
library(randtests)#install.packages("randtests")
library(lmtest)## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'olsrr'
##
## The following object is masked from 'package:datasets':
##
## rivers
## corrplot 0.92 loaded
Data
data <- read_excel("/Users/user/Downloads/Documents/Anreg /Tugas Akhir/Data-TA Anreg.xlsx")
Y<-data$Y
X0<-rep(1,40)
X1<-data$X1
X2<-data$X2
X3<-data$X3
X4<-data$X4
X5<-data$X5
X6<-data$X6
X7<-data$X7
X8<-data$X8
X9<-data$X9
X10<-data$X10
X11<-data$X11
X12<-data$X12
data<-data.frame(cbind(Y,X0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12))## Warning in cbind(Y, X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X12):
## number of rows of result is not a multiple of vector length (arg 2)
## Y X0 X1 X2 X3 X4 X5 X6 X7 X8 X9
## 1 7.67 1 4.864729 9.242458 6.583333 9.757500 8.719583 6.134467 8.75 7.73 5.28
## 2 4.82 1 4.428046 8.748206 5.406250 4.874792 3.694398 4.056243 2.50 4.45 4.48
## 3 5.76 1 3.443199 8.469393 5.828750 6.746042 5.931667 5.539811 6.25 7.71 3.37
## 4 6.85 1 5.192925 8.575657 7.718333 9.492500 9.096296 8.299711 10.00 6.21 5.29
## 5 7.99 1 7.009211 9.272043 8.199583 8.575417 8.866713 7.327089 8.75 7.76 5.92
## 6 8.52 1 7.575891 9.765973 6.237917 9.859375 9.531042 9.024500 10.00 5.92 8.61
## X10 X11 X12
## 1 9.59 8.43 6.97
## 2 7.08 3.92 4.84
## 3 5.79 5.52 4.73
## 4 4.04 3.32 5.30
## 5 9.12 8.15 6.94
## 6 9.53 8.07 8.12
## 'data.frame': 165 obs. of 14 variables:
## $ Y : num 7.67 4.82 5.76 6.85 7.99 8.52 8.24 5.65 7.82 5.47 ...
## $ X0 : num 1 1 1 1 1 1 1 1 1 1 ...
## $ X1 : num 4.86 4.43 3.44 5.19 7.01 ...
## $ X2 : num 9.24 8.75 8.47 8.58 9.27 ...
## $ X3 : num 6.58 5.41 5.83 7.72 8.2 ...
## $ X4 : num 9.76 4.87 6.75 9.49 8.58 ...
## $ X5 : num 8.72 3.69 5.93 9.1 8.87 ...
## $ X6 : num 6.13 4.06 5.54 8.3 7.33 ...
## $ X7 : num 8.75 2.5 6.25 10 8.75 ...
## $ X8 : num 7.73 4.45 7.71 6.21 7.76 5.92 4.95 4.6 8.52 7.08 ...
## $ X9 : num 5.28 4.48 3.37 5.29 5.92 8.61 8.55 5.27 5.63 6.29 ...
## $ X10: num 9.59 7.08 5.79 4.04 9.12 9.53 9.12 6.92 6.65 9.39 ...
## $ X11: num 8.43 3.92 5.52 3.32 8.15 8.07 8.55 7.18 5.07 8.19 ...
## $ X12: num 6.97 4.84 4.73 5.3 6.94 8.12 7.28 6.44 6.88 7.38 ...
Persamaan Awal Regresi
##
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 +
## X10 + X11 + X12, data = data)
##
## Coefficients:
## (Intercept) X1 X2 X3 X4 X5
## -0.08750 0.07578 0.08189 0.08667 0.08181 0.08752
## X6 X7 X8 X9 X10 X11
## 0.08617 0.08853 0.08442 0.09295 0.07832 0.08680
## X12
## 0.07913
##
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 +
## X10 + X11 + X12, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.214480 -0.009055 0.000717 0.017401 0.153061
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.087496 0.028857 -3.032 0.00286 **
## X1 0.075784 0.004117 18.409 < 2e-16 ***
## X2 0.081889 0.002556 32.041 < 2e-16 ***
## X3 0.086673 0.003277 26.447 < 2e-16 ***
## X4 0.081809 0.003102 26.369 < 2e-16 ***
## X5 0.087522 0.003311 26.434 < 2e-16 ***
## X6 0.086171 0.003860 22.324 < 2e-16 ***
## X7 0.088528 0.001413 62.649 < 2e-16 ***
## X8 0.084421 0.003102 27.211 < 2e-16 ***
## X9 0.092951 0.004564 20.365 < 2e-16 ***
## X10 0.078322 0.002686 29.161 < 2e-16 ***
## X11 0.086795 0.003921 22.135 < 2e-16 ***
## X12 0.079135 0.004855 16.298 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0383 on 152 degrees of freedom
## Multiple R-squared: 0.9992, Adjusted R-squared: 0.9991
## F-statistic: 1.554e+04 on 12 and 152 DF, p-value: < 2.2e-16
Eksplorasi Data
Hubungan tiap peubah x terhadap peubah y
y.bar <- mean(Y)
interactive.plot <- ggplot(data) +
geom_point(aes(x = X1,y = Y),color="coral",shape=8, size=1) +
geom_smooth(aes(x = X1, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Human Freedom Index vs Rule of Law") +
ylab("Human Freedom Index") +
xlab("Rule of Law") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)## `geom_smooth()` using formula = 'y ~ x'
y.bar <- mean(Y)
interactive.plot <- ggplot(data) +
geom_point(aes(x = X2,y = Y),color="chocolate",shape=8, size=1) +
geom_smooth(aes(x = X2, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Human Freedom Index vs Safety & Security") +
ylab("Human Freedom Index") +
xlab("Safety & Security") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)## `geom_smooth()` using formula = 'y ~ x'
y.bar <- mean(Y)
interactive.plot <- ggplot(data) +
geom_point(aes(x = X3,y = Y),color="darkgoldenrod3",shape=8, size=1) +
geom_smooth(aes(x = X3, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Human Freedom Index vs Movement") +
ylab("Human Freedom Index") +
xlab("Movement") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)## `geom_smooth()` using formula = 'y ~ x'
y.bar <- mean(Y)
interactive.plot <- ggplot(data) +
geom_point(aes(x = X4,y = Y),color="deepskyblue4",shape=8, size=1) +
geom_smooth(aes(x = X4, y =Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Human Freedom Index vs Religion") +
ylab("Human Freedom Index") +
xlab("Religion") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)## `geom_smooth()` using formula = 'y ~ x'
y.bar <- mean(Y)
interactive.plot <- ggplot(data) +
geom_point(aes(x = X5,y = Y),color="blueviolet",shape=8, size=1) +
geom_smooth(aes(x = X5, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Human Freedom Index vs Association, Assembly, & Civil Society") +
ylab("Human Freedom Index") +
xlab("Association, Assembly, & Civil Society") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)## `geom_smooth()` using formula = 'y ~ x'
y.bar <- mean(Y)
interactive.plot <- ggplot(data) +
geom_point(aes(x = X6,y = Y),color="chartreuse4",shape=8, size=1) +
geom_smooth(aes(x = X6, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Human Freedom Index vs Expression & Information") +
ylab("Human Freedom Index") +
xlab("Expression & Information") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)## `geom_smooth()` using formula = 'y ~ x'
y.bar <- mean(Y)
interactive.plot <- ggplot(data) +
geom_point(aes(x = X7,y = Y),color="#B2182B",shape=8, size=1) +
geom_smooth(aes(x = X7, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Human Freedom Index vs Identity & Relationships") +
ylab("Human Freedom Index") +
xlab("Identity & Relationships") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)## `geom_smooth()` using formula = 'y ~ x'
y.bar <- mean(Y)
interactive.plot <- ggplot(data) +
geom_point(aes(x = X8,y = Y),color="#D6604D",shape=8, size=1) +
geom_smooth(aes(x = X8, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Human Freedom Index vs Size of Govenrment") +
ylab("Human Freedom Index") +
xlab("Size of Govenrment") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)## `geom_smooth()` using formula = 'y ~ x'
y.bar <- mean(Y)
interactive.plot <- ggplot(data) +
geom_point(aes(x = X9,y = Y),color="#F4A582",shape=8, size=1) +
geom_smooth(aes(x = X9, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Human Freedom Index vs Legal System & Property Rights") +
ylab("Human Freedom Index") +
xlab("Legal System & Property Rights") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)## `geom_smooth()` using formula = 'y ~ x'
y.bar <- mean(Y)
interactive.plot <- ggplot(data) +
geom_point(aes(x = X10,y = Y),color="#99CC00",shape=8, size=1) +
geom_smooth(aes(x = X10, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Human Freedom Index vs Sound Money") +
ylab("Human Freedom Index") +
xlab("Sound Money") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)## `geom_smooth()` using formula = 'y ~ x'
y.bar <- mean(Y)
interactive.plot <- ggplot(data) +
geom_point(aes(x = X11,y = Y),color="#92C5DE",shape=8, size=1) +
geom_smooth(aes(x = X11, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Human Freedom Index vs Freedom to Trade Internationally") +
ylab("Human Freedom Index") +
xlab("Freedom to Trade Internationally") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)## `geom_smooth()` using formula = 'y ~ x'
y.bar <- mean(Y)
interactive.plot <- ggplot(data) +
geom_point(aes(x = X12,y = Y),color="#FF00FF",shape=8, size=1) +
geom_smooth(aes(x = X12, y = Y), method = "lm", se = FALSE, color = "cornsilk3") +
ggtitle("Human Freedom Index vs Regulation") +
ylab("Human Freedom Index") +
xlab("Regulation") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
ggplotly(interactive.plot)## `geom_smooth()` using formula = 'y ~ x'
Matriks Korelasi
## Bar Chart
ggplot(data, aes(x = Y)) +
geom_histogram(binwidth = 0.05, color = "black", fill = "#69b3a2", alpha = 0.8,
size = 0.1, position = "identity", show.legend = FALSE) +
geom_vline(aes(xintercept = mean(Y)), color = "red", linetype = "dashed", size = 1) +
geom_density(alpha = 0.2, fill = "#FF9999") +
ggtitle("Sebaran Nilai Human Freedom Index") +
xlab("Human Freedom") + ylab("Frekuensi") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"))## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Eksplorasi Kondisi Gauss-Markov
# Plot Sisaan vs Urutan: sisaan saling bebas
plot(x = 1:dim(data)[1],
y = model$residuals,
type = 'b',
ylab = "Residuals",
xlab = "Observation")Uji Formal Kondisi Gauss-Markov
# Asumsi Gauss-Markov: Nilai harapan sisaan sama dengan nol
t.test(model$residuals,
mu = 0,
conf.level = 0.95)##
## One Sample t-test
##
## data: model$residuals
## t = -3.0036e-16, df = 164, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.005667425 0.005667425
## sample estimates:
## mean of x
## -8.62105e-19
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 5.004655, Df = 1, p = 0.025279
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 22.471, df = 12, p-value = 0.03256
##
## Runs Test
##
## data: model$residuals
## statistic = 0, runs = 83, n1 = 82, n2 = 82, n = 164, p-value = 1
## alternative hypothesis: nonrandomness
##
## Durbin-Watson test
##
## data: model
## DW = 1.8497, p-value = 0.1645
## alternative hypothesis: true autocorrelation is greater than 0
##
## Shapiro-Wilk normality test
##
## data: model$residuals
## W = 0.80838, p-value = 1.942e-13
pada uji NCV, BP, dan Shapiro-wilk p-value < 0.05 sehingga tolak H0. diambil kesimpulan bahwa model tersebut sisaannya tidak menyebar normal dan ragamnya tidak homogen sehingga perlu analisis lebih lanjut
Pemeriksaan Multikolinieritas
## X1 X2 X3 X4 X5 X6 X7 X8
## 5.472347 2.125834 2.281046 5.035521 7.770521 9.585523 1.686990 1.358148
## X9 X10 X11 X12
## 6.337464 2.139229 3.125312 3.409483
Mulitkolinieritas terjadi saat nilai VIF > 10. pada data ini, nilai VIF untuk semua peubah < 10 maka dari itu multikolinieritas antar variabel tidak terdeteksi dan bisa dilanjutkan ke tahap analisis berikutnya
Pemeriksaan Amatan Berpengaruh
Tabel hii dan ri
s <- sqrt(anova(model)["Residuals", "Mean Sq"])
n = dim(data)[1]
p = length(model$coefficients)
hii=hatvalues(model)
Obs = c(1:n)
ei = model$residuals
ri = ei/(s*sqrt(1-hii))
Di = (ri^2/p)*(hii/(1-hii))
summ <- cbind.data.frame(Obs, data, hii, ri, Di)
head(summ)## Obs Y X0 X1 X2 X3 X4 X5 X6 X7 X8
## 1 1 7.67 1 4.864729 9.242458 6.583333 9.757500 8.719583 6.134467 8.75 7.73
## 2 2 4.82 1 4.428046 8.748206 5.406250 4.874792 3.694398 4.056243 2.50 4.45
## 3 3 5.76 1 3.443199 8.469393 5.828750 6.746042 5.931667 5.539811 6.25 7.71
## 4 4 6.85 1 5.192925 8.575657 7.718333 9.492500 9.096296 8.299711 10.00 6.21
## 5 5 7.99 1 7.009211 9.272043 8.199583 8.575417 8.866713 7.327089 8.75 7.76
## 6 6 8.52 1 7.575891 9.765973 6.237917 9.859375 9.531042 9.024500 10.00 5.92
## X9 X10 X11 X12 hii ri Di
## 1 5.28 9.59 8.43 6.97 0.06643009 0.514064230 1.446470e-03
## 2 4.48 7.08 3.92 4.84 0.09505868 0.661837447 3.539408e-03
## 3 3.37 5.79 5.52 4.73 0.05290021 0.404466725 7.028845e-04
## 4 5.29 4.04 3.32 5.30 0.14939956 -1.143692265 1.767253e-02
## 5 5.92 9.12 8.15 6.94 0.05670868 0.444242192 9.126409e-04
## 6 8.61 9.53 8.07 8.12 0.05197265 -0.005561875 1.304527e-07
Mendeteksi Titik Leverage
## 9 22 55 73 111 137 141 161 163
## 9 22 55 73 111 137 141 161 163
summ_leverage <- subset(summ, Obs %in% leverage)
summ_leverage <- subset(summ, Obs %in% leverage, select = c("Obs", "hii", "ri", "Di"))
summ_leverage## Obs hii ri Di
## 9 9 0.1729627 -0.52406597 4.418308e-03
## 22 22 0.1930682 0.52077661 4.991529e-03
## 55 55 0.1723072 -6.15585410 6.068307e-01
## 73 73 0.1825595 0.10224421 1.795901e-04
## 111 111 0.2565835 -5.70307933 8.635192e-01
## 137 137 0.1662502 1.22872287 2.315745e-02
## 141 141 0.2009714 -1.36469535 3.603297e-02
## 161 161 0.2470613 -0.05735877 8.304271e-05
## 163 163 0.2063192 -1.08208309 2.341378e-02
## Obs hii ri Di
## 111 111 0.2565835 -5.70307933 8.635192e-01
## 161 161 0.2470613 -0.05735877 8.304271e-05
## 163 163 0.2063192 -1.08208309 2.341378e-02
## 141 141 0.2009714 -1.36469535 3.603297e-02
## 22 22 0.1930682 0.52077661 4.991529e-03
## 73 73 0.1825595 0.10224421 1.795901e-04
## 9 9 0.1729627 -0.52406597 4.418308e-03
## 55 55 0.1723072 -6.15585410 6.068307e-01
## 137 137 0.1662502 1.22872287 2.315745e-02
Mendeteksi Pencilan
## [1] 52 53 55 79 83 111 112 125
summ_pencilan <- subset(summ, Obs %in% pencilan)
summ_pencilan <- subset(summ, Obs %in% pencilan, select = c("Obs", "hii", "ri", "Di"))
summ_pencilan## Obs hii ri Di
## 52 52 0.11822804 2.362934 0.05758693
## 53 53 0.07473013 4.154958 0.10725492
## 55 55 0.17230722 -6.155854 0.60683075
## 79 79 0.06640776 -2.005109 0.02199855
## 83 83 0.06420551 -2.994020 0.04731053
## 111 111 0.25658350 -5.703079 0.86351921
## 112 112 0.04642974 -2.150625 0.01732325
## 125 125 0.09288228 -2.157332 0.03665726
## Obs hii ri Di
## 111 111 0.25658350 -5.703079 0.86351921
## 55 55 0.17230722 -6.155854 0.60683075
## 52 52 0.11822804 2.362934 0.05758693
## 125 125 0.09288228 -2.157332 0.03665726
## 53 53 0.07473013 4.154958 0.10725492
## 79 79 0.06640776 -2.005109 0.02199855
## 83 83 0.06420551 -2.994020 0.04731053
## 112 112 0.04642974 -2.150625 0.01732325
Mendeteksi Amatan berpengaruh
Cook’s D
for (i in 1:dim(summ)[1]){
fcrit = qf(p=0.95, df1=p, df2=n-p)
amatan_berpengaruh <- which(Di > fcrit)
}
amatan_berpengaruh## named integer(0)
summ <- cbind.data.frame(Obs, data, hii, ri, Di)
summ_sorted <- summ %>%
arrange(desc(Di))
head(summ_sorted)## Obs Y X0 X1 X2 X3 X4 X5 X6 X7
## 111 111 5.48 1 2.248518 6.620798 4.351250 5.129583 2.241829 2.536015 10.000
## 55 55 6.50 1 6.838438 8.282947 6.131250 9.351875 6.464306 5.161967 8.750
## 53 53 8.70 1 8.588216 9.733592 8.252917 9.811875 9.158542 9.783417 10.000
## 52 52 7.16 1 5.956587 9.225635 7.720000 8.571875 6.909792 6.555375 9.375
## 83 83 6.25 1 5.835879 9.322234 7.530417 5.960208 3.931389 4.910233 4.375
## 125 125 5.39 1 4.633609 9.598463 4.460000 4.068958 2.204051 4.341233 2.500
## X8 X9 X10 X11 X12 hii ri Di
## 111 6.15 3.49 9.59 7.35 9.02 0.25658350 -5.703079 0.86351921
## 55 6.52 3.51 9.11 5.86 5.37 0.17230722 -6.155854 0.60683075
## 53 4.90 8.90 7.11 7.99 8.18 0.07473013 4.154958 0.10725492
## 52 6.76 4.73 6.32 5.39 7.74 0.11822804 2.362934 0.05758693
## 83 5.92 6.97 8.20 7.38 6.54 0.06420551 -2.994020 0.04731053
## 125 5.94 7.05 8.32 6.99 6.27 0.09288228 -2.157332 0.03665726
Menghapus Titik Leverage dan Pencilan
# Menggabungkan indeks titik leverage dan pencilan
observasi_normal <- setdiff(1:n, c(leverage, pencilan))
# Membuat data baru dengan menghapus titik leverage dan pencilan
data_clean <- data[observasi_normal,]
# Membuat model
model2 <- lm(Y ~ X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+X12 ,data= data_clean)
model2##
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 +
## X10 + X11 + X12, data = data_clean)
##
## Coefficients:
## (Intercept) X1 X2 X3 X4 X5
## -0.09860 0.07966 0.08371 0.08428 0.08273 0.08343
## X6 X7 X8 X9 X10 X11
## 0.08756 0.08828 0.08332 0.08173 0.08322 0.08604
## X12
## 0.08612
##
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 +
## X10 + X11 + X12, data = data_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.073581 -0.008555 0.003068 0.010314 0.039250
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0986022 0.0172808 -5.706 6.86e-08 ***
## X1 0.0796626 0.0025882 30.779 < 2e-16 ***
## X2 0.0837119 0.0015232 54.957 < 2e-16 ***
## X3 0.0842815 0.0019349 43.558 < 2e-16 ***
## X4 0.0827251 0.0017807 46.456 < 2e-16 ***
## X5 0.0834283 0.0019539 42.698 < 2e-16 ***
## X6 0.0875590 0.0022081 39.653 < 2e-16 ***
## X7 0.0882772 0.0008346 105.766 < 2e-16 ***
## X8 0.0833204 0.0018891 44.106 < 2e-16 ***
## X9 0.0817341 0.0029281 27.913 < 2e-16 ***
## X10 0.0832151 0.0018032 46.148 < 2e-16 ***
## X11 0.0860381 0.0024436 35.209 < 2e-16 ***
## X12 0.0861206 0.0033626 25.611 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02049 on 137 degrees of freedom
## Multiple R-squared: 0.9997, Adjusted R-squared: 0.9997
## F-statistic: 4.494e+04 on 12 and 137 DF, p-value: < 2.2e-16
Penduga Parameter dengan MKT Cara Manual
# Matriks data_clean (_cl)
Y_cl <- as.matrix(data_clean$Y)
X_cl <- as.matrix(cbind(data_clean$X0,data_clean$X1,data_clean$X2,data_clean$X3,
data_clean$X4,data_clean$X5,data_clean$X6,data_clean$X7,
data_clean$X8,data_clean$X9,data_clean$X10,data_clean$X11,
data_clean$X12))
b_cl <- solve(t(X_cl)%*%X_cl)%*%t(X_cl)%*%Y_cl;round(b_cl,5)## [,1]
## [1,] -0.09860
## [2,] 0.07966
## [3,] 0.08371
## [4,] 0.08428
## [5,] 0.08273
## [6,] 0.08343
## [7,] 0.08756
## [8,] 0.08828
## [9,] 0.08332
## [10,] 0.08173
## [11,] 0.08322
## [12,] 0.08604
## [13,] 0.08612
## [1] -0.0986
## [1] 0.07966
## [1] 0.08371
## [1] 0.08428
## [1] 0.08273
## [1] 0.08343
## [1] 0.08756
## [1] 0.08828
## [1] 0.08332
## [1] 0.08173
## [1] 0.08322
## [1] 0.08604
## [1] 0.08612
Persamaan Regresi Bergandanya adalah: \[y=b0+b1X1+b2X2+b3X3+b4X4+b5X5+b6X6+b7X7+b8X8+b9X9+b10X10+b11X11+b12X12\] \[y=-0.0986+0.07966X1+0.08371X2+0.08428X3+0.08273X4+0.08343X5+0.08756X6+0.08828X7+0.08332X8+0.08173X9+0.08322X10+ 0.08604X11+0.08612X12\]
Transformasi Box-Cox
##
## Attaching package: 'MASS'
## The following object is masked from 'package:olsrr':
##
## cement
## The following object is masked from 'package:plotly':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
bc_model <- boxcox(Y~X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+X12,data = data_clean,
lambda = seq(-2, 2, by = 0.1))## [1] 0.989899
Didapat nilai optimal dari lambda adalah 0.989899
# Mengubah nilai Y di data_clean dengan hasil transformasi
data_clean$Y <- (((data_clean$Y)^optimal_lambda)-1)/optimal_lambda
model3 <- lm(Y ~ X1+X2+X3+X4+X5+X6+X7+X8+X9+X10+X11+X12 ,data= data_clean)
model3##
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 +
## X10 + X11 + X12, data = data_clean)
##
## Coefficients:
## (Intercept) X1 X2 X3 X4 X5
## -1.04659 0.07771 0.08230 0.08301 0.08135 0.08185
## X6 X7 X8 X9 X10 X11
## 0.08592 0.08663 0.08176 0.07973 0.08172 0.08456
## X12
## 0.08489
Eksplorasi Kondisi Gauss-Markov
Uji Formal Kondisi Gauss-Markov
# Asumsi Gauss-Markov: Nilai harapan sisaan sama dengan nol
t.test(model2$residuals,
mu = 0,
conf.level = 0.95)##
## One Sample t-test
##
## data: model2$residuals
## t = 8.6498e-17, df = 149, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.003170338 0.003170338
## sample estimates:
## mean of x
## 1.387779e-19
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 20.96997, Df = 1, p = 4.6654e-06
##
## studentized Breusch-Pagan test
##
## data: model2
## BP = 47.678, df = 12, p-value = 3.556e-06
##
## Runs Test
##
## data: model2$residuals
## statistic = 0.16385, runs = 77, n1 = 75, n2 = 75, n = 150, p-value =
## 0.8698
## alternative hypothesis: nonrandomness
##
## Durbin-Watson test
##
## data: model2
## DW = 1.9671, p-value = 0.4154
## alternative hypothesis: true autocorrelation is greater than 0
##
## Shapiro-Wilk normality test
##
## data: model2$residuals
## W = 0.95077, p-value = 3.8e-05
Eksplorasi Kondisi Gauss-Markov
Uji Formal Kondisi Gauss-Markov
# Asumsi Gauss-Markov: Nilai harapan sisaan sama dengan nol
t.test(model3$residuals,
mu = 0,
conf.level = 0.95)##
## One Sample t-test
##
## data: model3$residuals
## t = -5.8828e-17, df = 149, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.003107681 0.003107681
## sample estimates:
## mean of x
## -9.251859e-20
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 20.96997, Df = 1, p = 4.6654e-06
##
## studentized Breusch-Pagan test
##
## data: model3
## BP = 48.343, df = 12, p-value = 2.724e-06
##
## Runs Test
##
## data: model3$residuals
## statistic = 0.49155, runs = 79, n1 = 75, n2 = 75, n = 150, p-value =
## 0.623
## alternative hypothesis: nonrandomness
##
## Durbin-Watson test
##
## data: model3
## DW = 1.9702, p-value = 0.4227
## alternative hypothesis: true autocorrelation is greater than 0
##
## Shapiro-Wilk normality test
##
## data: model3$residuals
## W = 0.95361, p-value = 6.619e-05
Pemeriksaan Multikolinieritas
## X1 X2 X3 X4 X5 X6 X7 X8
## 6.598408 2.212545 2.209229 4.870321 8.166094 9.750065 1.671044 1.557160
## X9 X10 X11 X12
## 7.808219 2.236277 3.926715 4.185779
setelah dikalukan pendugaan parameter MKT dan transformasi Box-Cos, belum belum juga memenuhi asumsi Gauss-Markov. Hasil uji formal kondisi Gauss-Markov mengindikasikan sisaannya masih belum menyebar normal dan ragamnya tidak homogen. Oleh karena itu dilakukan bentuk analisis yang lain.
Ordinary Least Square (OLS)
Alternatif pembuatan model terbaik
Forward
## Start: AIC=-1159.9
## Y ~ X0 + X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10 + X11 +
## X12
##
## Call:
## lm(formula = Y ~ X0 + X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 +
## X9 + X10 + X11 + X12, data = data_clean)
##
## Coefficients:
## (Intercept) X0 X1 X2 X3 X4
## -1.04659 NA 0.07771 0.08230 0.08301 0.08135
## X5 X6 X7 X8 X9 X10
## 0.08185 0.08592 0.08663 0.08176 0.07973 0.08172
## X11 X12
## 0.08456 0.08489
Backward
## Start: AIC=-1159.9
## Y ~ X0 + X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10 + X11 +
## X12
##
##
## Step: AIC=-1159.9
## Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10 + X11 +
## X12
##
## Df Sum of Sq RSS AIC
## <none> 0.0553 -1159.90
## - X12 1 0.2676 0.3229 -897.15
## - X9 1 0.3113 0.3666 -878.11
## - X1 1 0.3785 0.4338 -852.87
## - X11 1 0.5029 0.5582 -815.06
## - X6 1 0.6358 0.6911 -783.02
## - X5 1 0.7370 0.7922 -762.53
## - X3 1 0.7728 0.8281 -755.89
## - X8 1 0.7867 0.8420 -753.40
## - X10 1 0.8625 0.9178 -740.47
## - X4 1 0.8764 0.9317 -738.20
## - X2 1 1.2260 1.2812 -690.42
## - X7 1 4.5239 4.5792 -499.37
##
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 +
## X10 + X11 + X12, data = data_clean)
##
## Coefficients:
## (Intercept) X1 X2 X3 X4 X5
## -1.04659 0.07771 0.08230 0.08301 0.08135 0.08185
## X6 X7 X8 X9 X10 X11
## 0.08592 0.08663 0.08176 0.07973 0.08172 0.08456
## X12
## 0.08489
Stepwise
## Start: AIC=-1159.9
## Y ~ X0 + X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10 + X11 +
## X12
##
##
## Step: AIC=-1159.9
## Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10 + X11 +
## X12
##
## Df Sum of Sq RSS AIC
## <none> 0.0553 -1159.90
## - X12 1 0.2676 0.3229 -897.15
## - X9 1 0.3113 0.3666 -878.11
## - X1 1 0.3785 0.4338 -852.87
## - X11 1 0.5029 0.5582 -815.06
## - X6 1 0.6358 0.6911 -783.02
## - X5 1 0.7370 0.7922 -762.53
## - X3 1 0.7728 0.8281 -755.89
## - X8 1 0.7867 0.8420 -753.40
## - X10 1 0.8625 0.9178 -740.47
## - X4 1 0.8764 0.9317 -738.20
## - X2 1 1.2260 1.2812 -690.42
## - X7 1 4.5239 4.5792 -499.37
##
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 +
## X10 + X11 + X12, data = data_clean)
##
## Coefficients:
## (Intercept) X1 X2 X3 X4 X5
## -1.04659 0.07771 0.08230 0.08301 0.08135 0.08185
## X6 X7 X8 X9 X10 X11
## 0.08592 0.08663 0.08176 0.07973 0.08172 0.08456
## X12
## 0.08489
Best Subset Regression
## Best Subsets Regression
## -----------------------------------------------------
## Model Index Predictors
## -----------------------------------------------------
## 1 X6
## 2 X6 X11
## 3 X3 X6 X11
## 4 X3 X6 X7 X12
## 5 X3 X5 X7 X9 X11
## 6 X2 X4 X6 X7 X10 X12
## 7 X2 X3 X4 X6 X7 X10 X12
## 8 X2 X3 X4 X5 X7 X9 X10 X12
## 9 X2 X3 X4 X5 X7 X8 X9 X10 X11
## 10 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11
## 11 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11
## 12 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12
## -----------------------------------------------------
##
## Subsets Regression Summary
## -----------------------------------------------------------------------------------------------------------------------------------------
## Adj. Pred
## Model R-Square R-Square R-Square C(p) AIC SBIC SBC MSEP FPE HSP APC
## -----------------------------------------------------------------------------------------------------------------------------------------
## 1 0.7682 0.7666 0.7616 124902.2829 274.2451 -157.4269 283.2770 53.2226 0.3595 0.0024 0.2381
## 2 0.9110 0.9098 0.9062 47851.2114 132.6053 -301.0450 144.6478 20.5675 0.1399 9e-04 0.0926
## 3 0.9362 0.9348 0.9323 34294.0541 84.8057 -350.8236 99.8589 14.8587 0.1017 7e-04 0.0673
## 4 0.9579 0.9568 0.9548 22551.9673 24.2418 -413.3473 42.3056 9.8593 0.0679 5e-04 0.0450
## 5 0.9717 0.9707 0.9689 15140.6066 -33.0918 -472.6165 -12.0174 6.6847 0.0463 3e-04 0.0307
## 6 0.9829 0.9822 0.9813 9082.8757 -106.8719 -548.2611 -82.7868 4.0619 0.0283 2e-04 0.0188
## 7 0.9879 0.9873 0.9864 6414.0915 -156.1838 -599.4083 -129.0881 2.9056 0.0204 1e-04 0.0135
## 8 0.9904 0.9899 0.9892 5025.3628 -189.9961 -635.0395 -159.8897 2.3048 0.0163 1e-04 0.0108
## 9 0.9936 0.9932 0.9926 3333.3683 -247.7236 -694.3695 -214.6066 1.5589 0.0111 1e-04 0.0073
## 10 0.9964 0.9961 0.9958 1822.1671 -331.8726 -779.5662 -295.7450 0.8842 0.0063 0.0000 0.0042
## 11 0.9985 0.9984 0.9982 666.9319 -464.8626 -911.3189 -425.7243 0.3621 0.0026 0.0000 0.0017
## 12 0.9997 0.9997 0.9997 13.0000 -726.2260 -1149.4584 -684.0771 0.0630 5e-04 0.0000 3e-04
## -----------------------------------------------------------------------------------------------------------------------------------------
## AIC: Akaike Information Criteria
## SBIC: Sawa's Bayesian Information Criteria
## SBC: Schwarz Bayesian Criteria
## MSEP: Estimated error of prediction, assuming multivariate normality
## FPE: Final Prediction Error
## HSP: Hocking's Sp
## APC: Amemiya Prediction Criteria
Weighted Least Squares (WLS)
weights=1/lm(abs(model2$residuals) ~ model2$fitted.values)$fitted.values^2
model_wls <- lm(Y~X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10 + X11 +X12 ,data = data_clean,weights = weights)
model_wls##
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 +
## X10 + X11 + X12, data = data_clean, weights = weights)
##
## Coefficients:
## (Intercept) X1 X2 X3 X4 X5
## -1.04390 0.07837 0.08182 0.08491 0.08120 0.08240
## X6 X7 X8 X9 X10 X11
## 0.08442 0.08556 0.08121 0.08069 0.08172 0.08554
## X12
## 0.08365
##
## Call:
## lm(formula = Y ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 +
## X10 + X11 + X12, data = data_clean, weights = weights)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -5.8417 -0.6370 0.1739 0.8529 2.4103
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.0439012 0.0175094 -59.62 <2e-16 ***
## X1 0.0783743 0.0021926 35.74 <2e-16 ***
## X2 0.0818195 0.0014443 56.65 <2e-16 ***
## X3 0.0849083 0.0015800 53.74 <2e-16 ***
## X4 0.0812010 0.0017857 45.47 <2e-16 ***
## X5 0.0824023 0.0018226 45.21 <2e-16 ***
## X6 0.0844194 0.0018167 46.47 <2e-16 ***
## X7 0.0855594 0.0008794 97.29 <2e-16 ***
## X8 0.0812113 0.0015014 54.09 <2e-16 ***
## X9 0.0806932 0.0024613 32.78 <2e-16 ***
## X10 0.0817198 0.0018250 44.78 <2e-16 ***
## X11 0.0855405 0.0022860 37.42 <2e-16 ***
## X12 0.0836520 0.0026880 31.12 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.331 on 137 degrees of freedom
## Multiple R-squared: 0.9998, Adjusted R-squared: 0.9998
## F-statistic: 5.395e+04 on 12 and 137 DF, p-value: < 2.2e-16
Uji Formal Kondisi Gauss-Markov (setelah WLS)
# Asumsi Gauss-Markov: Nilai harapan sisaan sama dengan nol
t.test(model_wls$residuals,
mu = 0,
conf.level = 0.95)##
## One Sample t-test
##
## data: model_wls$residuals
## t = -0.067452, df = 149, p-value = 0.9463
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.003260183 0.003044954
## sample estimates:
## mean of x
## -0.0001076149
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 0.006805707, Df = 1, p = 0.93425
##
## Runs Test
##
## data: model_wls$residuals
## statistic = 0.16385, runs = 77, n1 = 75, n2 = 75, n = 150, p-value =
## 0.8698
## alternative hypothesis: nonrandomness
# Asumsi Normalitas Sisaan
sisaan_model_wls <- resid(model_wls)
(norm_model_wls <- lillie.test(sisaan_model_wls))##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: sisaan_model_wls
## D = 0.10588, p-value = 0.0003014
## [1] "Sisaan Tidak Menyebar Normal"
## X1 X2 X3 X4 X5 X6 X7 X8
## 8.897954 2.380737 1.780773 4.024907 6.945232 8.210702 1.812280 1.543035
## X9 X10 X11 X12
## 10.494779 2.413666 4.008855 4.207437