rm(list = ls())
library(foreign)
library("gplots")
library(ggplot2)
library("dplyr")
library(tidyverse)
library(gganimate)
library(tseries)
library(plm)
library(lmtest)
library(sandwich)
library(lmtest)
#coint test
library(pco)
library(memisc)
options(repr.matrix.max.cols=50, repr.matrix.max.rows=100)
df <- data.frame(as.data.set(spss.system.file("/Users/macbookairm1/Documents/BUKU R/KS2.sav")))
File character set is 'UTF-8'.
Converting character set to UTF-8.
head(df)
A tibble: 6 × 31
|
X1.1
|
X1.2
|
X1.3
|
X1.4
|
X1.5
|
X1.6
|
X2.1
|
X2.2
|
X2.3
|
X2.4
|
X2.5
|
X2.6
|
X3.1
|
X3.2
|
X3.3
|
X3.4
|
X3.5
|
X3.6
|
X3.7
|
Y.1
|
Y.2
|
Y.3
|
Y.4
|
Y.5
|
Y.6
|
Y.7
|
X1
|
X2
|
X3
|
Y
|
resid
|
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
|
5
|
5
|
4
|
3
|
5
|
3
|
5
|
5
|
5
|
5
|
4
|
4
|
4
|
5
|
4
|
5
|
4
|
3
|
5
|
5
|
5
|
5
|
5
|
4
|
4
|
5
|
25
|
28
|
30
|
33
|
0.8668151
|
|
5
|
5
|
5
|
5
|
5
|
2
|
5
|
5
|
5
|
3
|
5
|
4
|
5
|
4
|
3
|
5
|
3
|
4
|
2
|
5
|
5
|
5
|
5
|
5
|
3
|
3
|
27
|
27
|
26
|
31
|
0.7530831
|
|
3
|
3
|
4
|
4
|
5
|
3
|
5
|
5
|
5
|
5
|
4
|
4
|
3
|
5
|
5
|
5
|
4
|
4
|
4
|
4
|
5
|
5
|
5
|
5
|
3
|
4
|
22
|
28
|
30
|
31
|
-0.9689718
|
|
5
|
4
|
5
|
5
|
4
|
4
|
5
|
5
|
5
|
4
|
5
|
5
|
3
|
4
|
4
|
5
|
4
|
4
|
3
|
5
|
5
|
5
|
4
|
4
|
4
|
3
|
27
|
29
|
27
|
30
|
-1.9852273
|
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
3
|
4
|
4
|
4
|
5
|
3
|
4
|
5
|
4
|
4
|
4
|
5
|
3
|
5
|
24
|
24
|
27
|
30
|
1.7200560
|
|
4
|
4
|
4
|
5
|
4
|
4
|
3
|
4
|
4
|
4
|
4
|
4
|
3
|
4
|
4
|
3
|
4
|
3
|
4
|
3
|
4
|
3
|
3
|
4
|
4
|
4
|
25
|
23
|
25
|
25
|
-1.9827029
|
jarque.bera.test(df$RES_1)
Jarque Bera Test
data: df$RES_1
X-squared = 12.808, df = 2, p-value = 0.001655
df <- subset(df, select = -c(X1, X2, X3, Y,RES_1,ABS_RESID))
head(df)
A tibble: 6 × 31
|
X1.1
|
X1.2
|
X1.3
|
X1.4
|
X1.5
|
X1.6
|
X2.1
|
X2.2
|
X2.3
|
X2.4
|
X2.5
|
X2.6
|
X3.1
|
X3.2
|
X3.3
|
X3.4
|
X3.5
|
X3.6
|
X3.7
|
Y.1
|
Y.2
|
Y.3
|
Y.4
|
Y.5
|
Y.6
|
Y.7
|
X1
|
X2
|
X3
|
Y
|
resid
|
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
|
5
|
5
|
4
|
3
|
5
|
3
|
5
|
5
|
5
|
5
|
4
|
4
|
4
|
5
|
4
|
5
|
4
|
3
|
5
|
5
|
5
|
5
|
5
|
4
|
4
|
5
|
25
|
28
|
30
|
33
|
0.8668151
|
|
5
|
5
|
5
|
5
|
5
|
2
|
5
|
5
|
5
|
3
|
5
|
4
|
5
|
4
|
3
|
5
|
3
|
4
|
2
|
5
|
5
|
5
|
5
|
5
|
3
|
3
|
27
|
27
|
26
|
31
|
0.7530831
|
|
3
|
3
|
4
|
4
|
5
|
3
|
5
|
5
|
5
|
5
|
4
|
4
|
3
|
5
|
5
|
5
|
4
|
4
|
4
|
4
|
5
|
5
|
5
|
5
|
3
|
4
|
22
|
28
|
30
|
31
|
-0.9689718
|
|
5
|
4
|
5
|
5
|
4
|
4
|
5
|
5
|
5
|
4
|
5
|
5
|
3
|
4
|
4
|
5
|
4
|
4
|
3
|
5
|
5
|
5
|
4
|
4
|
4
|
3
|
27
|
29
|
27
|
30
|
-1.9852273
|
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
3
|
4
|
4
|
4
|
5
|
3
|
4
|
5
|
4
|
4
|
4
|
5
|
3
|
5
|
24
|
24
|
27
|
30
|
1.7200560
|
|
4
|
4
|
4
|
5
|
4
|
4
|
3
|
4
|
4
|
4
|
4
|
4
|
3
|
4
|
4
|
3
|
4
|
3
|
4
|
3
|
4
|
3
|
3
|
4
|
4
|
4
|
25
|
23
|
25
|
25
|
-1.9827029
|
#konversi factor
df <- data.frame(lapply(df, function(x) as.numeric(as.character(x))))
df <- df %>% as_tibble() %>%
mutate(X1 = rowSums(across(starts_with('X1.'))),
X2 = rowSums(across(starts_with('X2.'))),
X3 = rowSums(across(starts_with('X3.'))),
Y = rowSums(across(starts_with('Y.'))))
head(df)
A tibble: 6 × 31
|
X1.1
|
X1.2
|
X1.3
|
X1.4
|
X1.5
|
X1.6
|
X2.1
|
X2.2
|
X2.3
|
X2.4
|
X2.5
|
X2.6
|
X3.1
|
X3.2
|
X3.3
|
X3.4
|
X3.5
|
X3.6
|
X3.7
|
Y.1
|
Y.2
|
Y.3
|
Y.4
|
Y.5
|
Y.6
|
Y.7
|
X1
|
X2
|
X3
|
Y
|
resid
|
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
|
5
|
5
|
4
|
3
|
5
|
3
|
5
|
5
|
5
|
5
|
4
|
4
|
4
|
5
|
4
|
5
|
4
|
3
|
5
|
5
|
5
|
5
|
5
|
4
|
4
|
5
|
25
|
28
|
30
|
33
|
0.8668151
|
|
5
|
5
|
5
|
5
|
5
|
2
|
5
|
5
|
5
|
3
|
5
|
4
|
5
|
4
|
3
|
5
|
3
|
4
|
2
|
5
|
5
|
5
|
5
|
5
|
3
|
3
|
27
|
27
|
26
|
31
|
0.7530831
|
|
3
|
3
|
4
|
4
|
5
|
3
|
5
|
5
|
5
|
5
|
4
|
4
|
3
|
5
|
5
|
5
|
4
|
4
|
4
|
4
|
5
|
5
|
5
|
5
|
3
|
4
|
22
|
28
|
30
|
31
|
-0.9689718
|
|
5
|
4
|
5
|
5
|
4
|
4
|
5
|
5
|
5
|
4
|
5
|
5
|
3
|
4
|
4
|
5
|
4
|
4
|
3
|
5
|
5
|
5
|
4
|
4
|
4
|
3
|
27
|
29
|
27
|
30
|
-1.9852273
|
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
3
|
4
|
4
|
4
|
5
|
3
|
4
|
5
|
4
|
4
|
4
|
5
|
3
|
5
|
24
|
24
|
27
|
30
|
1.7200560
|
|
4
|
4
|
4
|
5
|
4
|
4
|
3
|
4
|
4
|
4
|
4
|
4
|
3
|
4
|
4
|
3
|
4
|
3
|
4
|
3
|
4
|
3
|
3
|
4
|
4
|
4
|
25
|
23
|
25
|
25
|
-1.9827029
|
summary(df)
X1.1 X1.2 X1.3 X1.4 X1.5
Min. :3.00 Min. :2.00 Min. :2.00 Min. :2.00 Min. :2.00
1st Qu.:4.00 1st Qu.:4.00 1st Qu.:4.00 1st Qu.:4.00 1st Qu.:4.00
Median :4.00 Median :4.00 Median :4.00 Median :4.00 Median :4.00
Mean :4.19 Mean :4.14 Mean :4.03 Mean :4.19 Mean :4.12
3rd Qu.:5.00 3rd Qu.:5.00 3rd Qu.:5.00 3rd Qu.:5.00 3rd Qu.:5.00
Max. :5.00 Max. :5.00 Max. :5.00 Max. :5.00 Max. :5.00
X1.6 X2.1 X2.2 X2.3 X2.4
Min. :1.00 Min. :2.00 Min. :2.00 Min. :1.00 Min. :2.00
1st Qu.:2.00 1st Qu.:3.00 1st Qu.:4.00 1st Qu.:3.00 1st Qu.:3.00
Median :3.00 Median :4.00 Median :4.00 Median :4.00 Median :4.00
Mean :2.88 Mean :3.96 Mean :4.03 Mean :3.94 Mean :3.67
3rd Qu.:4.00 3rd Qu.:5.00 3rd Qu.:5.00 3rd Qu.:4.00 3rd Qu.:4.00
Max. :5.00 Max. :5.00 Max. :5.00 Max. :5.00 Max. :5.00
X2.5 X2.6 X3.1 X3.2 X3.3
Min. :2.00 Min. :1.00 Min. :1.00 Min. :1.00 Min. :1.0
1st Qu.:3.00 1st Qu.:3.00 1st Qu.:3.00 1st Qu.:3.00 1st Qu.:3.0
Median :4.00 Median :4.00 Median :4.00 Median :4.00 Median :4.0
Mean :3.93 Mean :3.76 Mean :3.73 Mean :3.69 Mean :3.7
3rd Qu.:4.00 3rd Qu.:4.00 3rd Qu.:4.00 3rd Qu.:4.00 3rd Qu.:4.0
Max. :5.00 Max. :5.00 Max. :5.00 Max. :5.00 Max. :5.0
X3.4 X3.5 X3.6 X3.7 Y.1
Min. :1.00 Min. :1.00 Min. :1.00 Min. :1.00 Min. :2.0
1st Qu.:3.00 1st Qu.:3.00 1st Qu.:3.00 1st Qu.:3.00 1st Qu.:4.0
Median :4.00 Median :4.00 Median :4.00 Median :3.00 Median :4.0
Mean :3.92 Mean :3.53 Mean :3.57 Mean :3.33 Mean :4.1
3rd Qu.:5.00 3rd Qu.:4.00 3rd Qu.:4.00 3rd Qu.:4.00 3rd Qu.:5.0
Max. :5.00 Max. :5.00 Max. :5.00 Max. :5.00 Max. :5.0
Y.2 Y.3 Y.4 Y.5 Y.6
Min. :1.0 Min. :1.00 Min. :1.00 Min. :1.0 Min. :1.00
1st Qu.:4.0 1st Qu.:4.00 1st Qu.:3.00 1st Qu.:3.0 1st Qu.:3.00
Median :4.0 Median :4.00 Median :4.00 Median :4.0 Median :4.00
Mean :4.1 Mean :4.15 Mean :3.73 Mean :3.8 Mean :3.54
3rd Qu.:5.0 3rd Qu.:5.00 3rd Qu.:4.00 3rd Qu.:4.0 3rd Qu.:4.00
Max. :5.0 Max. :5.00 Max. :5.00 Max. :5.0 Max. :5.00
Y.7 X1 X2 X3 Y
Min. :1.00 Min. :18.00 Min. :13.00 Min. :11.00 Min. : 8.00
1st Qu.:3.00 1st Qu.:22.00 1st Qu.:20.00 1st Qu.:22.75 1st Qu.:25.00
Median :4.00 Median :24.00 Median :24.00 Median :26.00 Median :28.00
Mean :3.84 Mean :23.55 Mean :23.29 Mean :25.47 Mean :27.26
3rd Qu.:4.00 3rd Qu.:26.00 3rd Qu.:26.00 3rd Qu.:28.00 3rd Qu.:30.25
Max. :5.00 Max. :30.00 Max. :30.00 Max. :35.00 Max. :35.00
head(df)
A tibble: 6 × 31
|
X1.1
|
X1.2
|
X1.3
|
X1.4
|
X1.5
|
X1.6
|
X2.1
|
X2.2
|
X2.3
|
X2.4
|
X2.5
|
X2.6
|
X3.1
|
X3.2
|
X3.3
|
X3.4
|
X3.5
|
X3.6
|
X3.7
|
Y.1
|
Y.2
|
Y.3
|
Y.4
|
Y.5
|
Y.6
|
Y.7
|
X1
|
X2
|
X3
|
Y
|
resid
|
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
|
5
|
5
|
4
|
3
|
5
|
3
|
5
|
5
|
5
|
5
|
4
|
4
|
4
|
5
|
4
|
5
|
4
|
3
|
5
|
5
|
5
|
5
|
5
|
4
|
4
|
5
|
25
|
28
|
30
|
33
|
0.8668151
|
|
5
|
5
|
5
|
5
|
5
|
2
|
5
|
5
|
5
|
3
|
5
|
4
|
5
|
4
|
3
|
5
|
3
|
4
|
2
|
5
|
5
|
5
|
5
|
5
|
3
|
3
|
27
|
27
|
26
|
31
|
0.7530831
|
|
3
|
3
|
4
|
4
|
5
|
3
|
5
|
5
|
5
|
5
|
4
|
4
|
3
|
5
|
5
|
5
|
4
|
4
|
4
|
4
|
5
|
5
|
5
|
5
|
3
|
4
|
22
|
28
|
30
|
31
|
-0.9689718
|
|
5
|
4
|
5
|
5
|
4
|
4
|
5
|
5
|
5
|
4
|
5
|
5
|
3
|
4
|
4
|
5
|
4
|
4
|
3
|
5
|
5
|
5
|
4
|
4
|
4
|
3
|
27
|
29
|
27
|
30
|
-1.9852273
|
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
3
|
4
|
4
|
4
|
5
|
3
|
4
|
5
|
4
|
4
|
4
|
5
|
3
|
5
|
24
|
24
|
27
|
30
|
1.7200560
|
|
4
|
4
|
4
|
5
|
4
|
4
|
3
|
4
|
4
|
4
|
4
|
4
|
3
|
4
|
4
|
3
|
4
|
3
|
4
|
3
|
4
|
3
|
3
|
4
|
4
|
4
|
25
|
23
|
25
|
25
|
-1.9827029
|
model <- lm(Y~X1 + X2 + X3, data=df)
summary(model)
Call:
lm(formula = Y ~ X1 + X2 + X3, data = df)
Residuals:
Min 1Q Median 3Q Max
-9.4924 -1.4966 -0.1058 1.3958 7.9596
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.27828 2.37469 0.538 0.592
X1 0.05474 0.13387 0.409 0.684
X2 0.70821 0.10777 6.571 2.58e-09 ***
X3 0.32188 0.07748 4.155 7.07e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.714 on 96 degrees of freedom
Multiple R-squared: 0.6969, Adjusted R-squared: 0.6874
F-statistic: 73.56 on 3 and 96 DF, p-value: < 2.2e-16
df$resid <- model$residuals
hist(df$resid)
png
boxplot(df$resid)
png
jarque.bera.test(df$resid)
Jarque Bera Test
data: df$resid
X-squared = 11.609, df = 2, p-value = 0.003014
library(tidyverse)
df %>%
mutate(id = row_number()) %>%
pivot_longer(
cols = c("X1","X2", "X3", "Y"),
names_to = "names",
values_to = "values"
) %>%
ggplot(aes(x=names, y=values, fill=names))+
geom_boxplot() +
geom_jitter(aes(y=values))
png
df %>%
mutate(id = row_number()) %>%
pivot_longer(
cols = c("X1", "X2", "X3", "Y"),
names_to = "names",
values_to = "values"
) %>%
ggplot(aes(x = names, y = values, fill = names)) +
geom_boxplot() +
geom_text(aes(label = values), vjust = -0.5) +
labs(x = "Names", y = "Values") +
theme_minimal()
png
library(GGally)
Scatter_Matrix <- ggpairs(df,columns = c("X1", "X2", "X3", "Y"),
title = "Scatter Plot Matrix for mtcars Dataset",
axisLabels = "show")
Scatter_Matrix
png
df1 <- subset(df, Y > 17)
df1 %>%
mutate(id = row_number()) %>%
pivot_longer(
cols = c("X1", "X2", "X3", "Y"),
names_to = "names",
values_to = "values"
) %>%
ggplot(aes(x = names, y = values, fill = names)) +
geom_boxplot() +
geom_text(aes(label = values), vjust = -0.5) +
labs(x = "Names", y = "Values") +
theme_minimal()
png
model1 <- lm(Y~X1 + X2 + X3, data=df1)
summary(model1)
df1$resid <- model1$residuals
jarque.bera.test(df1$resid)
Call:
lm(formula = Y ~ X1 + X2 + X3, data = df1)
Residuals:
Min 1Q Median 3Q Max
-9.3288 -1.4411 -0.0829 1.3043 7.2201
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.88598 2.38262 1.631 0.10632
X1 0.14441 0.12837 1.125 0.26356
X2 0.58255 0.10583 5.505 3.32e-07 ***
X3 0.26016 0.07657 3.398 0.00101 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.54 on 92 degrees of freedom
Multiple R-squared: 0.609, Adjusted R-squared: 0.5962
F-statistic: 47.76 on 3 and 92 DF, p-value: < 2.2e-16
Jarque Bera Test
data: df1$resid
X-squared = 11.284, df = 2, p-value = 0.003546
hist(df1$resid)
png
##tampaaknya ini semakin buruk, saya akan menggunakan model pertama
##lebih baik gunakan metode bootstrap atau robust standard error
#sebagai catatan, tidak normal tidak menyebabkan bias
#kita tetap dapat menggunakan ols asalakan sampelnya cukup besar, setidaknya lebih dari 30. Koefisien cenderung konsisten
#calculate robust standard errors for model coefficients
coeftest(model, vcov = vcovHC(model, type = 'HC1'))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.278278 2.017627 0.6336 0.5278778
X1 0.054738 0.142547 0.3840 0.7018293
X2 0.708214 0.129614 5.4640 3.664e-07 ***
X3 0.321882 0.085661 3.7576 0.0002944 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
summary(model)
Call:
lm(formula = Y ~ X1 + X2 + X3, data = df)
Residuals:
Min 1Q Median 3Q Max
-9.4924 -1.4966 -0.1058 1.3958 7.9596
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.27828 2.37469 0.538 0.592
X1 0.05474 0.13387 0.409 0.684
X2 0.70821 0.10777 6.571 2.58e-09 ***
X3 0.32188 0.07748 4.155 7.07e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.714 on 96 degrees of freedom
Multiple R-squared: 0.6969, Adjusted R-squared: 0.6874
F-statistic: 73.56 on 3 and 96 DF, p-value: < 2.2e-16
#koefisien masih sama dan masih signifikan
install.packages("stargazer")
The downloaded binary packages are in
/var/folders/bp/bl32ppld457g5vsgb7f3qn6r0000gn/T//RtmpMiV3I9/downloaded_packages
library(stargazer)
stargazer(model, model1, title="Results", type = "text")
Results
=================================================================
Dependent variable:
---------------------------------------------
Y
(1) (2)
-----------------------------------------------------------------
X1 0.055 0.144
(0.134) (0.128)
X2 0.708*** 0.583***
(0.108) (0.106)
X3 0.322*** 0.260***
(0.077) (0.077)
Constant 1.278 3.886
(2.375) (2.383)
-----------------------------------------------------------------
Observations 100 96
R2 0.697 0.609
Adjusted R2 0.687 0.596
Residual Std. Error 2.714 (df = 96) 2.540 (df = 92)
F Statistic 73.562*** (df = 3; 96) 47.760*** (df = 3; 92)
=================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
str(df)
tibble [100 × 31] (S3: tbl_df/tbl/data.frame)
$ X1.1 : num [1:100] 5 5 3 5 4 4 5 4 5 5 ...
$ X1.2 : num [1:100] 5 5 3 4 4 4 5 4 4 5 ...
$ X1.3 : num [1:100] 4 5 4 5 4 4 5 4 4 5 ...
$ X1.4 : num [1:100] 3 5 4 5 4 5 4 4 5 5 ...
$ X1.5 : num [1:100] 5 5 5 4 4 4 5 4 5 5 ...
$ X1.6 : num [1:100] 3 2 3 4 4 4 4 2 3 1 ...
$ X2.1 : num [1:100] 5 5 5 5 4 3 4 3 4 4 ...
$ X2.2 : num [1:100] 5 5 5 5 4 4 5 4 4 5 ...
$ X2.3 : num [1:100] 5 5 5 5 4 4 4 3 4 4 ...
$ X2.4 : num [1:100] 5 3 5 4 4 4 5 3 4 4 ...
$ X2.5 : num [1:100] 4 5 4 5 4 4 5 3 5 5 ...
$ X2.6 : num [1:100] 4 4 4 5 4 4 5 3 5 4 ...
$ X3.1 : num [1:100] 4 5 3 3 3 3 5 4 5 5 ...
$ X3.2 : num [1:100] 5 4 5 4 4 4 5 4 5 5 ...
$ X3.3 : num [1:100] 4 3 5 4 4 4 4 4 5 5 ...
$ X3.4 : num [1:100] 5 5 5 5 4 3 5 4 5 5 ...
$ X3.5 : num [1:100] 4 3 4 4 5 4 5 3 4 4 ...
$ X3.6 : num [1:100] 3 4 4 4 3 3 5 4 4 5 ...
$ X3.7 : num [1:100] 5 2 4 3 4 4 3 4 4 4 ...
$ Y.1 : num [1:100] 5 5 4 5 5 3 5 4 5 4 ...
$ Y.2 : num [1:100] 5 5 5 5 4 4 5 4 4 5 ...
$ Y.3 : num [1:100] 5 5 5 5 4 3 5 4 4 5 ...
$ Y.4 : num [1:100] 5 5 5 4 4 3 5 4 3 5 ...
$ Y.5 : num [1:100] 4 5 5 4 5 4 5 4 4 5 ...
$ Y.6 : num [1:100] 4 3 3 4 3 4 4 4 3 5 ...
$ Y.7 : num [1:100] 5 3 4 3 5 4 4 4 4 5 ...
$ X1 : num [1:100] 25 27 22 27 24 25 28 22 26 26 ...
$ X2 : num [1:100] 28 27 28 29 24 23 28 19 26 26 ...
$ X3 : num [1:100] 30 26 30 27 27 25 32 27 32 33 ...
$ Y : num [1:100] 33 31 31 30 30 25 33 28 27 34 ...
$ resid: Named num [1:100] 0.867 0.753 -0.969 -1.985 1.72 ...
..- attr(*, "names")= chr [1:100] "1" "2" "3" "4" ...
#validity instrumen
df_validity <- df %>% as_tibble() %>%
mutate(sumX1 = rowSums(across(starts_with('X1.'))),
sumX2 = rowSums(across(starts_with('X2.'))),
sumX3 = rowSums(across(starts_with('X3.'))),
sumY = rowSums(across(starts_with('Y.'))))
head(df_validity)
A tibble: 6 × 35
|
X1.1
|
X1.2
|
X1.3
|
X1.4
|
X1.5
|
X1.6
|
X2.1
|
X2.2
|
X2.3
|
X2.4
|
X2.5
|
X2.6
|
X3.1
|
X3.2
|
X3.3
|
X3.4
|
X3.5
|
X3.6
|
X3.7
|
Y.1
|
Y.2
|
Y.3
|
Y.4
|
Y.5
|
Y.6
|
Y.7
|
X1
|
X2
|
X3
|
Y
|
resid
|
sumX1
|
sumX2
|
sumX3
|
sumY
|
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
|
5
|
5
|
4
|
3
|
5
|
3
|
5
|
5
|
5
|
5
|
4
|
4
|
4
|
5
|
4
|
5
|
4
|
3
|
5
|
5
|
5
|
5
|
5
|
4
|
4
|
5
|
25
|
28
|
30
|
33
|
0.8668151
|
25
|
28
|
30
|
33
|
|
5
|
5
|
5
|
5
|
5
|
2
|
5
|
5
|
5
|
3
|
5
|
4
|
5
|
4
|
3
|
5
|
3
|
4
|
2
|
5
|
5
|
5
|
5
|
5
|
3
|
3
|
27
|
27
|
26
|
31
|
0.7530831
|
27
|
27
|
26
|
31
|
|
3
|
3
|
4
|
4
|
5
|
3
|
5
|
5
|
5
|
5
|
4
|
4
|
3
|
5
|
5
|
5
|
4
|
4
|
4
|
4
|
5
|
5
|
5
|
5
|
3
|
4
|
22
|
28
|
30
|
31
|
-0.9689718
|
22
|
28
|
30
|
31
|
|
5
|
4
|
5
|
5
|
4
|
4
|
5
|
5
|
5
|
4
|
5
|
5
|
3
|
4
|
4
|
5
|
4
|
4
|
3
|
5
|
5
|
5
|
4
|
4
|
4
|
3
|
27
|
29
|
27
|
30
|
-1.9852273
|
27
|
29
|
27
|
30
|
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
3
|
4
|
4
|
4
|
5
|
3
|
4
|
5
|
4
|
4
|
4
|
5
|
3
|
5
|
24
|
24
|
27
|
30
|
1.7200560
|
24
|
24
|
27
|
30
|
|
4
|
4
|
4
|
5
|
4
|
4
|
3
|
4
|
4
|
4
|
4
|
4
|
3
|
4
|
4
|
3
|
4
|
3
|
4
|
3
|
4
|
3
|
3
|
4
|
4
|
4
|
25
|
23
|
25
|
25
|
-1.9827029
|
25
|
23
|
25
|
25
|
X1_instrumen <- df_validity[, grepl("X1", names(df_validity))]
X2_instrumen <- df_validity[, grepl("X2", names(df_validity))]
X3_instrumen <- df_validity[, grepl("X3", names(df_validity))]
Y_instrumen <- df_validity[, grepl("Y", names(df_validity))]
library(Hmisc) # You need to download it first.
rcorr(as.matrix(X1_instrumen), type="pearson")
X1.1 X1.2 X1.3 X1.4 X1.5 X1.6 X1 sumX1
X1.1 1.00 0.61 0.55 0.39 0.46 -0.16 0.73 0.73
X1.2 0.61 1.00 0.55 0.47 0.46 -0.28 0.72 0.72
X1.3 0.55 0.55 1.00 0.52 0.56 -0.20 0.76 0.76
X1.4 0.39 0.47 0.52 1.00 0.59 -0.21 0.71 0.71
X1.5 0.46 0.46 0.56 0.59 1.00 -0.30 0.69 0.69
X1.6 -0.16 -0.28 -0.20 -0.21 -0.30 1.00 0.10 0.10
X1 0.73 0.72 0.76 0.71 0.69 0.10 1.00 1.00
sumX1 0.73 0.72 0.76 0.71 0.69 0.10 1.00 1.00
n= 100
P
X1.1 X1.2 X1.3 X1.4 X1.5 X1.6 X1 sumX1
X1.1 0.0000 0.0000 0.0000 0.0000 0.1082 0.0000 0.0000
X1.2 0.0000 0.0000 0.0000 0.0000 0.0054 0.0000 0.0000
X1.3 0.0000 0.0000 0.0000 0.0000 0.0509 0.0000 0.0000
X1.4 0.0000 0.0000 0.0000 0.0000 0.0333 0.0000 0.0000
X1.5 0.0000 0.0000 0.0000 0.0000 0.0023 0.0000 0.0000
X1.6 0.1082 0.0054 0.0509 0.0333 0.0023 0.3275 0.3275
X1 0.0000 0.0000 0.0000 0.0000 0.0000 0.3275 0.0000
sumX1 0.0000 0.0000 0.0000 0.0000 0.0000 0.3275 0.0000
#hapus x6
rcorr(as.matrix(X2_instrumen), type="pearson")
X2.1 X2.2 X2.3 X2.4 X2.5 X2.6 X2 sumX2
X2.1 1.00 0.57 0.69 0.50 0.52 0.57 0.80 0.80
X2.2 0.57 1.00 0.57 0.48 0.65 0.52 0.78 0.78
X2.3 0.69 0.57 1.00 0.61 0.59 0.64 0.85 0.85
X2.4 0.50 0.48 0.61 1.00 0.56 0.66 0.79 0.79
X2.5 0.52 0.65 0.59 0.56 1.00 0.63 0.81 0.81
X2.6 0.57 0.52 0.64 0.66 0.63 1.00 0.83 0.83
X2 0.80 0.78 0.85 0.79 0.81 0.83 1.00 1.00
sumX2 0.80 0.78 0.85 0.79 0.81 0.83 1.00 1.00
n= 100
P
X2.1 X2.2 X2.3 X2.4 X2.5 X2.6 X2 sumX2
X2.1 0 0 0 0 0 0 0
X2.2 0 0 0 0 0 0 0
X2.3 0 0 0 0 0 0 0
X2.4 0 0 0 0 0 0 0
X2.5 0 0 0 0 0 0 0
X2.6 0 0 0 0 0 0 0
X2 0 0 0 0 0 0 0
sumX2 0 0 0 0 0 0 0
rcorr(as.matrix(X3_instrumen), type="pearson")
X3.1 X3.2 X3.3 X3.4 X3.5 X3.6 X3.7 X3 sumX3
X3.1 1.00 0.68 0.63 0.43 0.50 0.53 0.10 0.75 0.75
X3.2 0.68 1.00 0.77 0.51 0.55 0.63 0.05 0.81 0.81
X3.3 0.63 0.77 1.00 0.50 0.65 0.71 0.14 0.85 0.85
X3.4 0.43 0.51 0.50 1.00 0.53 0.49 0.25 0.73 0.73
X3.5 0.50 0.55 0.65 0.53 1.00 0.66 0.20 0.80 0.80
X3.6 0.53 0.63 0.71 0.49 0.66 1.00 0.22 0.82 0.82
X3.7 0.10 0.05 0.14 0.25 0.20 0.22 1.00 0.39 0.39
X3 0.75 0.81 0.85 0.73 0.80 0.82 0.39 1.00 1.00
sumX3 0.75 0.81 0.85 0.73 0.80 0.82 0.39 1.00 1.00
n= 100
P
X3.1 X3.2 X3.3 X3.4 X3.5 X3.6 X3.7 X3 sumX3
X3.1 0.0000 0.0000 0.0000 0.0000 0.0000 0.3425 0.0000 0.0000
X3.2 0.0000 0.0000 0.0000 0.0000 0.0000 0.6523 0.0000 0.0000
X3.3 0.0000 0.0000 0.0000 0.0000 0.0000 0.1541 0.0000 0.0000
X3.4 0.0000 0.0000 0.0000 0.0000 0.0000 0.0123 0.0000 0.0000
X3.5 0.0000 0.0000 0.0000 0.0000 0.0000 0.0484 0.0000 0.0000
X3.6 0.0000 0.0000 0.0000 0.0000 0.0000 0.0255 0.0000 0.0000
X3.7 0.3425 0.6523 0.1541 0.0123 0.0484 0.0255 0.0000 0.0000
X3 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
sumX3 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
rcorr(as.matrix(Y_instrumen), type="pearson")
Y.1 Y.2 Y.3 Y.4 Y.5 Y.6 Y.7 Y sumY
Y.1 1.00 0.79 0.55 0.46 0.41 0.30 0.36 0.71 0.71
Y.2 0.79 1.00 0.65 0.46 0.50 0.34 0.45 0.77 0.77
Y.3 0.55 0.65 1.00 0.42 0.58 0.51 0.56 0.80 0.80
Y.4 0.46 0.46 0.42 1.00 0.49 0.45 0.47 0.71 0.71
Y.5 0.41 0.50 0.58 0.49 1.00 0.67 0.74 0.82 0.82
Y.6 0.30 0.34 0.51 0.45 0.67 1.00 0.63 0.74 0.74
Y.7 0.36 0.45 0.56 0.47 0.74 0.63 1.00 0.79 0.79
Y 0.71 0.77 0.80 0.71 0.82 0.74 0.79 1.00 1.00
sumY 0.71 0.77 0.80 0.71 0.82 0.74 0.79 1.00 1.00
n= 100
P
Y.1 Y.2 Y.3 Y.4 Y.5 Y.6 Y.7 Y sumY
Y.1 0.0000 0.0000 0.0000 0.0000 0.0027 0.0003 0.0000 0.0000
Y.2 0.0000 0.0000 0.0000 0.0000 0.0005 0.0000 0.0000 0.0000
Y.3 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
Y.4 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
Y.5 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
Y.6 0.0027 0.0005 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
Y.7 0.0003 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
Y 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
sumY 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
#hapus X1.6 saja
df_clean <- subset(df_validity, select = -c(X1.6, X1, X2, X3, Y, resid, sumX1, sumX2, sumX3, sumY))
head(df_clean)
A tibble: 6 × 30
|
X1.1
|
X1.2
|
X1.3
|
X1.4
|
X1.5
|
X2.1
|
X2.2
|
X2.3
|
X2.4
|
X2.5
|
X2.6
|
X3.1
|
X3.2
|
X3.3
|
X3.4
|
X3.5
|
X3.6
|
X3.7
|
Y.1
|
Y.2
|
Y.3
|
Y.4
|
Y.5
|
Y.6
|
Y.7
|
X1
|
X2
|
X3
|
Y
|
resid
|
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
|
5
|
5
|
4
|
3
|
5
|
5
|
5
|
5
|
5
|
4
|
4
|
4
|
5
|
4
|
5
|
4
|
3
|
5
|
5
|
5
|
5
|
5
|
4
|
4
|
5
|
22
|
28
|
30
|
33
|
0.9228734
|
|
5
|
5
|
5
|
5
|
5
|
5
|
5
|
5
|
3
|
5
|
4
|
5
|
4
|
3
|
5
|
3
|
4
|
2
|
5
|
5
|
5
|
5
|
5
|
3
|
3
|
25
|
27
|
26
|
31
|
0.6098644
|
|
3
|
3
|
4
|
4
|
5
|
5
|
5
|
5
|
5
|
4
|
4
|
3
|
5
|
5
|
5
|
4
|
4
|
4
|
4
|
5
|
5
|
5
|
5
|
3
|
4
|
19
|
28
|
30
|
31
|
-0.7990978
|
|
5
|
4
|
5
|
5
|
4
|
5
|
5
|
5
|
4
|
5
|
5
|
3
|
4
|
4
|
5
|
4
|
4
|
3
|
5
|
5
|
5
|
4
|
4
|
4
|
3
|
23
|
29
|
27
|
30
|
-1.9035415
|
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
3
|
4
|
4
|
4
|
5
|
3
|
4
|
5
|
4
|
4
|
4
|
5
|
3
|
5
|
20
|
24
|
27
|
30
|
1.8244974
|
|
4
|
4
|
4
|
5
|
4
|
3
|
4
|
4
|
4
|
4
|
4
|
3
|
4
|
4
|
3
|
4
|
3
|
4
|
3
|
4
|
3
|
3
|
4
|
4
|
4
|
21
|
23
|
25
|
25
|
-1.9406680
|
df_clean <- df_clean %>% as_tibble() %>%
mutate(X1 = rowSums(across(starts_with('X1.'))),
X2 = rowSums(across(starts_with('X2.'))),
X3 = rowSums(across(starts_with('X3.'))),
Y = rowSums(across(starts_with('Y.'))))
head(df_clean)
A tibble: 6 × 30
|
X1.1
|
X1.2
|
X1.3
|
X1.4
|
X1.5
|
X2.1
|
X2.2
|
X2.3
|
X2.4
|
X2.5
|
X2.6
|
X3.1
|
X3.2
|
X3.3
|
X3.4
|
X3.5
|
X3.6
|
X3.7
|
Y.1
|
Y.2
|
Y.3
|
Y.4
|
Y.5
|
Y.6
|
Y.7
|
X1
|
X2
|
X3
|
Y
|
resid
|
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
<dbl>
|
|
5
|
5
|
4
|
3
|
5
|
5
|
5
|
5
|
5
|
4
|
4
|
4
|
5
|
4
|
5
|
4
|
3
|
5
|
5
|
5
|
5
|
5
|
4
|
4
|
5
|
22
|
28
|
30
|
33
|
0.9228734
|
|
5
|
5
|
5
|
5
|
5
|
5
|
5
|
5
|
3
|
5
|
4
|
5
|
4
|
3
|
5
|
3
|
4
|
2
|
5
|
5
|
5
|
5
|
5
|
3
|
3
|
25
|
27
|
26
|
31
|
0.6098644
|
|
3
|
3
|
4
|
4
|
5
|
5
|
5
|
5
|
5
|
4
|
4
|
3
|
5
|
5
|
5
|
4
|
4
|
4
|
4
|
5
|
5
|
5
|
5
|
3
|
4
|
19
|
28
|
30
|
31
|
-0.7990978
|
|
5
|
4
|
5
|
5
|
4
|
5
|
5
|
5
|
4
|
5
|
5
|
3
|
4
|
4
|
5
|
4
|
4
|
3
|
5
|
5
|
5
|
4
|
4
|
4
|
3
|
23
|
29
|
27
|
30
|
-1.9035415
|
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
4
|
3
|
4
|
4
|
4
|
5
|
3
|
4
|
5
|
4
|
4
|
4
|
5
|
3
|
5
|
20
|
24
|
27
|
30
|
1.8244974
|
|
4
|
4
|
4
|
5
|
4
|
3
|
4
|
4
|
4
|
4
|
4
|
3
|
4
|
4
|
3
|
4
|
3
|
4
|
3
|
4
|
3
|
3
|
4
|
4
|
4
|
21
|
23
|
25
|
25
|
-1.9406680
|
#model clean
model_clean <- lm(Y~X1 + X2 + X3, data=df_clean)
summary(model_clean)
Call:
lm(formula = Y ~ X1 + X2 + X3, data = df_clean)
Residuals:
Min 1Q Median 3Q Max
-9.494 -1.411 -0.107 1.429 7.819
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.15556 2.05610 0.562 0.575
X1 0.09268 0.13165 0.704 0.483
X2 0.69000 0.10964 6.293 9.25e-09 ***
X3 0.31875 0.07725 4.126 7.86e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.71 on 96 degrees of freedom
Multiple R-squared: 0.6979, Adjusted R-squared: 0.6885
F-statistic: 73.92 on 3 and 96 DF, p-value: < 2.2e-16
df_clean$resid <- model_clean$residuals
hist(df_clean$resid)
png
coeftest(model_clean, vcov = vcovHC(model_clean, type = 'HC1'))
t test of coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.155559 1.745136 0.6622 0.5094553
X1 0.092676 0.160897 0.5760 0.5659658
X2 0.690002 0.143027 4.8243 5.281e-06 ***
X3 0.318754 0.086487 3.6856 0.0003777 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
stargazer(model, model1, model_clean, title="Results", type = "text")
Results
========================================================================================
Dependent variable:
--------------------------------------------------------------------
Y
(1) (2) (3)
----------------------------------------------------------------------------------------
X1 0.055 0.144 0.093
(0.134) (0.128) (0.132)
X2 0.708*** 0.583*** 0.690***
(0.108) (0.106) (0.110)
X3 0.322*** 0.260*** 0.319***
(0.077) (0.077) (0.077)
Constant 1.278 3.886 1.156
(2.375) (2.383) (2.056)
----------------------------------------------------------------------------------------
Observations 100 96 100
R2 0.697 0.609 0.698
Adjusted R2 0.687 0.596 0.688
Residual Std. Error 2.714 (df = 96) 2.540 (df = 92) 2.710 (df = 96)
F Statistic 73.562*** (df = 3; 96) 47.760*** (df = 3; 92) 73.923*** (df = 3; 96)
========================================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
robust_model <- sqrt(diag(vcovHC(model, type = "HC1")))
robust_model1 <- sqrt(diag(vcovHC(model1, type = "HC1")))
robust_model_clean <- sqrt(diag(vcovHC(model_clean, type = "HC1")))
stargazer(model, model1, model_clean, title="Results", type = "text", se = list(robust_model, robust_model1, robust_model_clean))
Results
========================================================================================
Dependent variable:
--------------------------------------------------------------------
Y
(1) (2) (3)
----------------------------------------------------------------------------------------
X1 0.055 0.144 0.093
(0.143) (0.137) (0.161)
X2 0.708*** 0.583*** 0.690***
(0.130) (0.124) (0.143)
X3 0.322*** 0.260*** 0.319***
(0.086) (0.082) (0.086)
Constant 1.278 3.886* 1.156
(2.018) (2.046) (1.745)
----------------------------------------------------------------------------------------
Observations 100 96 100
R2 0.697 0.609 0.698
Adjusted R2 0.687 0.596 0.688
Residual Std. Error 2.714 (df = 96) 2.540 (df = 92) 2.710 (df = 96)
F Statistic 73.562*** (df = 3; 96) 47.760*** (df = 3; 92) 73.923*** (df = 3; 96)
========================================================================================
Note: *p<0.1; **p<0.05; ***p<0.01