rm(list = ls())
library(foreign)
library("gplots")
library(ggplot2)
library("dplyr")
library(tidyverse)
library(gganimate)
library(tseries)
library(plm)
library(lmtest)
library(sandwich)
library(lmtest)
#coint test
library(pco)
library(memisc)
options(repr.matrix.max.cols=50, repr.matrix.max.rows=100)
df <- data.frame(as.data.set(spss.system.file("/Users/macbookairm1/Documents/BUKU R/KS2.sav")))
File character set is 'UTF-8'.

Converting character set to UTF-8.
head(df)
A tibble: 6 × 31
X1.1 X1.2 X1.3 X1.4 X1.5 X1.6 X2.1 X2.2 X2.3 X2.4 X2.5 X2.6 X3.1 X3.2 X3.3 X3.4 X3.5 X3.6 X3.7 Y.1 Y.2 Y.3 Y.4 Y.5 Y.6 Y.7 X1 X2 X3 Y resid
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
5 5 4 3 5 3 5 5 5 5 4 4 4 5 4 5 4 3 5 5 5 5 5 4 4 5 25 28 30 33 0.8668151
5 5 5 5 5 2 5 5 5 3 5 4 5 4 3 5 3 4 2 5 5 5 5 5 3 3 27 27 26 31 0.7530831
3 3 4 4 5 3 5 5 5 5 4 4 3 5 5 5 4 4 4 4 5 5 5 5 3 4 22 28 30 31 -0.9689718
5 4 5 5 4 4 5 5 5 4 5 5 3 4 4 5 4 4 3 5 5 5 4 4 4 3 27 29 27 30 -1.9852273
4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 5 3 4 5 4 4 4 5 3 5 24 24 27 30 1.7200560
4 4 4 5 4 4 3 4 4 4 4 4 3 4 4 3 4 3 4 3 4 3 3 4 4 4 25 23 25 25 -1.9827029
jarque.bera.test(df$RES_1)
    Jarque Bera Test

data:  df$RES_1
X-squared = 12.808, df = 2, p-value = 0.001655
df <- subset(df, select = -c(X1, X2, X3, Y,RES_1,ABS_RESID))
head(df)
A tibble: 6 × 31
X1.1 X1.2 X1.3 X1.4 X1.5 X1.6 X2.1 X2.2 X2.3 X2.4 X2.5 X2.6 X3.1 X3.2 X3.3 X3.4 X3.5 X3.6 X3.7 Y.1 Y.2 Y.3 Y.4 Y.5 Y.6 Y.7 X1 X2 X3 Y resid
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
5 5 4 3 5 3 5 5 5 5 4 4 4 5 4 5 4 3 5 5 5 5 5 4 4 5 25 28 30 33 0.8668151
5 5 5 5 5 2 5 5 5 3 5 4 5 4 3 5 3 4 2 5 5 5 5 5 3 3 27 27 26 31 0.7530831
3 3 4 4 5 3 5 5 5 5 4 4 3 5 5 5 4 4 4 4 5 5 5 5 3 4 22 28 30 31 -0.9689718
5 4 5 5 4 4 5 5 5 4 5 5 3 4 4 5 4 4 3 5 5 5 4 4 4 3 27 29 27 30 -1.9852273
4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 5 3 4 5 4 4 4 5 3 5 24 24 27 30 1.7200560
4 4 4 5 4 4 3 4 4 4 4 4 3 4 4 3 4 3 4 3 4 3 3 4 4 4 25 23 25 25 -1.9827029
#konversi factor
df <- data.frame(lapply(df, function(x) as.numeric(as.character(x))))
df <- df %>% as_tibble() %>% 
   mutate(X1 = rowSums(across(starts_with('X1.'))),
         X2 = rowSums(across(starts_with('X2.'))),
         X3 = rowSums(across(starts_with('X3.'))),
         Y = rowSums(across(starts_with('Y.'))))
head(df)
A tibble: 6 × 31
X1.1 X1.2 X1.3 X1.4 X1.5 X1.6 X2.1 X2.2 X2.3 X2.4 X2.5 X2.6 X3.1 X3.2 X3.3 X3.4 X3.5 X3.6 X3.7 Y.1 Y.2 Y.3 Y.4 Y.5 Y.6 Y.7 X1 X2 X3 Y resid
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
5 5 4 3 5 3 5 5 5 5 4 4 4 5 4 5 4 3 5 5 5 5 5 4 4 5 25 28 30 33 0.8668151
5 5 5 5 5 2 5 5 5 3 5 4 5 4 3 5 3 4 2 5 5 5 5 5 3 3 27 27 26 31 0.7530831
3 3 4 4 5 3 5 5 5 5 4 4 3 5 5 5 4 4 4 4 5 5 5 5 3 4 22 28 30 31 -0.9689718
5 4 5 5 4 4 5 5 5 4 5 5 3 4 4 5 4 4 3 5 5 5 4 4 4 3 27 29 27 30 -1.9852273
4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 5 3 4 5 4 4 4 5 3 5 24 24 27 30 1.7200560
4 4 4 5 4 4 3 4 4 4 4 4 3 4 4 3 4 3 4 3 4 3 3 4 4 4 25 23 25 25 -1.9827029
summary(df)
      X1.1           X1.2           X1.3           X1.4           X1.5     
 Min.   :3.00   Min.   :2.00   Min.   :2.00   Min.   :2.00   Min.   :2.00  
 1st Qu.:4.00   1st Qu.:4.00   1st Qu.:4.00   1st Qu.:4.00   1st Qu.:4.00  
 Median :4.00   Median :4.00   Median :4.00   Median :4.00   Median :4.00  
 Mean   :4.19   Mean   :4.14   Mean   :4.03   Mean   :4.19   Mean   :4.12  
 3rd Qu.:5.00   3rd Qu.:5.00   3rd Qu.:5.00   3rd Qu.:5.00   3rd Qu.:5.00  
 Max.   :5.00   Max.   :5.00   Max.   :5.00   Max.   :5.00   Max.   :5.00  
      X1.6           X2.1           X2.2           X2.3           X2.4     
 Min.   :1.00   Min.   :2.00   Min.   :2.00   Min.   :1.00   Min.   :2.00  
 1st Qu.:2.00   1st Qu.:3.00   1st Qu.:4.00   1st Qu.:3.00   1st Qu.:3.00  
 Median :3.00   Median :4.00   Median :4.00   Median :4.00   Median :4.00  
 Mean   :2.88   Mean   :3.96   Mean   :4.03   Mean   :3.94   Mean   :3.67  
 3rd Qu.:4.00   3rd Qu.:5.00   3rd Qu.:5.00   3rd Qu.:4.00   3rd Qu.:4.00  
 Max.   :5.00   Max.   :5.00   Max.   :5.00   Max.   :5.00   Max.   :5.00  
      X2.5           X2.6           X3.1           X3.2           X3.3    
 Min.   :2.00   Min.   :1.00   Min.   :1.00   Min.   :1.00   Min.   :1.0  
 1st Qu.:3.00   1st Qu.:3.00   1st Qu.:3.00   1st Qu.:3.00   1st Qu.:3.0  
 Median :4.00   Median :4.00   Median :4.00   Median :4.00   Median :4.0  
 Mean   :3.93   Mean   :3.76   Mean   :3.73   Mean   :3.69   Mean   :3.7  
 3rd Qu.:4.00   3rd Qu.:4.00   3rd Qu.:4.00   3rd Qu.:4.00   3rd Qu.:4.0  
 Max.   :5.00   Max.   :5.00   Max.   :5.00   Max.   :5.00   Max.   :5.0  
      X3.4           X3.5           X3.6           X3.7           Y.1     
 Min.   :1.00   Min.   :1.00   Min.   :1.00   Min.   :1.00   Min.   :2.0  
 1st Qu.:3.00   1st Qu.:3.00   1st Qu.:3.00   1st Qu.:3.00   1st Qu.:4.0  
 Median :4.00   Median :4.00   Median :4.00   Median :3.00   Median :4.0  
 Mean   :3.92   Mean   :3.53   Mean   :3.57   Mean   :3.33   Mean   :4.1  
 3rd Qu.:5.00   3rd Qu.:4.00   3rd Qu.:4.00   3rd Qu.:4.00   3rd Qu.:5.0  
 Max.   :5.00   Max.   :5.00   Max.   :5.00   Max.   :5.00   Max.   :5.0  
      Y.2           Y.3            Y.4            Y.5           Y.6      
 Min.   :1.0   Min.   :1.00   Min.   :1.00   Min.   :1.0   Min.   :1.00  
 1st Qu.:4.0   1st Qu.:4.00   1st Qu.:3.00   1st Qu.:3.0   1st Qu.:3.00  
 Median :4.0   Median :4.00   Median :4.00   Median :4.0   Median :4.00  
 Mean   :4.1   Mean   :4.15   Mean   :3.73   Mean   :3.8   Mean   :3.54  
 3rd Qu.:5.0   3rd Qu.:5.00   3rd Qu.:4.00   3rd Qu.:4.0   3rd Qu.:4.00  
 Max.   :5.0   Max.   :5.00   Max.   :5.00   Max.   :5.0   Max.   :5.00  
      Y.7             X1              X2              X3              Y        
 Min.   :1.00   Min.   :18.00   Min.   :13.00   Min.   :11.00   Min.   : 8.00  
 1st Qu.:3.00   1st Qu.:22.00   1st Qu.:20.00   1st Qu.:22.75   1st Qu.:25.00  
 Median :4.00   Median :24.00   Median :24.00   Median :26.00   Median :28.00  
 Mean   :3.84   Mean   :23.55   Mean   :23.29   Mean   :25.47   Mean   :27.26  
 3rd Qu.:4.00   3rd Qu.:26.00   3rd Qu.:26.00   3rd Qu.:28.00   3rd Qu.:30.25  
 Max.   :5.00   Max.   :30.00   Max.   :30.00   Max.   :35.00   Max.   :35.00  
head(df)
A tibble: 6 × 31
X1.1 X1.2 X1.3 X1.4 X1.5 X1.6 X2.1 X2.2 X2.3 X2.4 X2.5 X2.6 X3.1 X3.2 X3.3 X3.4 X3.5 X3.6 X3.7 Y.1 Y.2 Y.3 Y.4 Y.5 Y.6 Y.7 X1 X2 X3 Y resid
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
5 5 4 3 5 3 5 5 5 5 4 4 4 5 4 5 4 3 5 5 5 5 5 4 4 5 25 28 30 33 0.8668151
5 5 5 5 5 2 5 5 5 3 5 4 5 4 3 5 3 4 2 5 5 5 5 5 3 3 27 27 26 31 0.7530831
3 3 4 4 5 3 5 5 5 5 4 4 3 5 5 5 4 4 4 4 5 5 5 5 3 4 22 28 30 31 -0.9689718
5 4 5 5 4 4 5 5 5 4 5 5 3 4 4 5 4 4 3 5 5 5 4 4 4 3 27 29 27 30 -1.9852273
4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 5 3 4 5 4 4 4 5 3 5 24 24 27 30 1.7200560
4 4 4 5 4 4 3 4 4 4 4 4 3 4 4 3 4 3 4 3 4 3 3 4 4 4 25 23 25 25 -1.9827029
model <- lm(Y~X1 + X2 + X3, data=df)
summary(model)
Call:
lm(formula = Y ~ X1 + X2 + X3, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-9.4924 -1.4966 -0.1058  1.3958  7.9596 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  1.27828    2.37469   0.538    0.592    
X1           0.05474    0.13387   0.409    0.684    
X2           0.70821    0.10777   6.571 2.58e-09 ***
X3           0.32188    0.07748   4.155 7.07e-05 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 2.714 on 96 degrees of freedom
Multiple R-squared:  0.6969,    Adjusted R-squared:  0.6874 
F-statistic: 73.56 on 3 and 96 DF,  p-value: < 2.2e-16
df$resid <- model$residuals
hist(df$resid)
png
png
boxplot(df$resid)
png
png
jarque.bera.test(df$resid)
    Jarque Bera Test

data:  df$resid
X-squared = 11.609, df = 2, p-value = 0.003014
library(tidyverse)
df %>% 
    mutate(id = row_number()) %>% 
    pivot_longer(
        cols = c("X1","X2", "X3", "Y"),
        names_to = "names",
        values_to = "values"
    ) %>% 
    ggplot(aes(x=names, y=values, fill=names))+
    geom_boxplot() +
    geom_jitter(aes(y=values))
png
png
df %>%
  mutate(id = row_number()) %>%
  pivot_longer(
    cols = c("X1", "X2", "X3", "Y"),
    names_to = "names",
    values_to = "values"
  ) %>%
  ggplot(aes(x = names, y = values, fill = names)) +
  geom_boxplot() +
  geom_text(aes(label = values), vjust = -0.5) +
  labs(x = "Names", y = "Values") +
  theme_minimal()
png
png
library(GGally) 
Scatter_Matrix <- ggpairs(df,columns = c("X1", "X2", "X3", "Y"), 
                          title = "Scatter Plot Matrix for mtcars Dataset", 
                          axisLabels = "show") 
Scatter_Matrix
png
png
df1 <- subset(df, Y  > 17)
df1 %>%
  mutate(id = row_number()) %>%
  pivot_longer(
    cols = c("X1", "X2", "X3", "Y"),
    names_to = "names",
    values_to = "values"
  ) %>%
  ggplot(aes(x = names, y = values, fill = names)) +
  geom_boxplot() +
  geom_text(aes(label = values), vjust = -0.5) +
  labs(x = "Names", y = "Values") +
  theme_minimal()
png
png
model1 <- lm(Y~X1 + X2 + X3, data=df1)
summary(model1)
df1$resid <- model1$residuals

jarque.bera.test(df1$resid)
Call:
lm(formula = Y ~ X1 + X2 + X3, data = df1)

Residuals:
    Min      1Q  Median      3Q     Max 
-9.3288 -1.4411 -0.0829  1.3043  7.2201 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  3.88598    2.38262   1.631  0.10632    
X1           0.14441    0.12837   1.125  0.26356    
X2           0.58255    0.10583   5.505 3.32e-07 ***
X3           0.26016    0.07657   3.398  0.00101 ** 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 2.54 on 92 degrees of freedom
Multiple R-squared:  0.609, Adjusted R-squared:  0.5962 
F-statistic: 47.76 on 3 and 92 DF,  p-value: < 2.2e-16





    Jarque Bera Test

data:  df1$resid
X-squared = 11.284, df = 2, p-value = 0.003546
hist(df1$resid)
png
png
##tampaaknya ini semakin buruk, saya akan menggunakan model pertama
##lebih baik gunakan metode bootstrap atau robust standard error
#sebagai catatan, tidak normal tidak menyebabkan bias
#kita tetap dapat menggunakan ols asalakan sampelnya cukup besar, setidaknya lebih dari 30. Koefisien cenderung konsisten
#calculate robust standard errors for model coefficients
coeftest(model, vcov = vcovHC(model, type = 'HC1'))
t test of coefficients:

            Estimate Std. Error t value  Pr(>|t|)    
(Intercept) 1.278278   2.017627  0.6336 0.5278778    
X1          0.054738   0.142547  0.3840 0.7018293    
X2          0.708214   0.129614  5.4640 3.664e-07 ***
X3          0.321882   0.085661  3.7576 0.0002944 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
summary(model)
Call:
lm(formula = Y ~ X1 + X2 + X3, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-9.4924 -1.4966 -0.1058  1.3958  7.9596 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  1.27828    2.37469   0.538    0.592    
X1           0.05474    0.13387   0.409    0.684    
X2           0.70821    0.10777   6.571 2.58e-09 ***
X3           0.32188    0.07748   4.155 7.07e-05 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 2.714 on 96 degrees of freedom
Multiple R-squared:  0.6969,    Adjusted R-squared:  0.6874 
F-statistic: 73.56 on 3 and 96 DF,  p-value: < 2.2e-16
#koefisien masih sama dan masih signifikan
install.packages("stargazer")
The downloaded binary packages are in
    /var/folders/bp/bl32ppld457g5vsgb7f3qn6r0000gn/T//RtmpMiV3I9/downloaded_packages
library(stargazer)
stargazer(model, model1, title="Results", type = "text")
Results
=================================================================
                                 Dependent variable:             
                    ---------------------------------------------
                                          Y                      
                             (1)                    (2)          
-----------------------------------------------------------------
X1                          0.055                  0.144         
                           (0.134)                (0.128)        
                                                                 
X2                         0.708***               0.583***       
                           (0.108)                (0.106)        
                                                                 
X3                         0.322***               0.260***       
                           (0.077)                (0.077)        
                                                                 
Constant                    1.278                  3.886         
                           (2.375)                (2.383)        
                                                                 
-----------------------------------------------------------------
Observations                 100                     96          
R2                          0.697                  0.609         
Adjusted R2                 0.687                  0.596         
Residual Std. Error    2.714 (df = 96)        2.540 (df = 92)    
F Statistic         73.562*** (df = 3; 96) 47.760*** (df = 3; 92)
=================================================================
Note:                                 *p<0.1; **p<0.05; ***p<0.01
str(df)
tibble [100 × 31] (S3: tbl_df/tbl/data.frame)
 $ X1.1 : num [1:100] 5 5 3 5 4 4 5 4 5 5 ...
 $ X1.2 : num [1:100] 5 5 3 4 4 4 5 4 4 5 ...
 $ X1.3 : num [1:100] 4 5 4 5 4 4 5 4 4 5 ...
 $ X1.4 : num [1:100] 3 5 4 5 4 5 4 4 5 5 ...
 $ X1.5 : num [1:100] 5 5 5 4 4 4 5 4 5 5 ...
 $ X1.6 : num [1:100] 3 2 3 4 4 4 4 2 3 1 ...
 $ X2.1 : num [1:100] 5 5 5 5 4 3 4 3 4 4 ...
 $ X2.2 : num [1:100] 5 5 5 5 4 4 5 4 4 5 ...
 $ X2.3 : num [1:100] 5 5 5 5 4 4 4 3 4 4 ...
 $ X2.4 : num [1:100] 5 3 5 4 4 4 5 3 4 4 ...
 $ X2.5 : num [1:100] 4 5 4 5 4 4 5 3 5 5 ...
 $ X2.6 : num [1:100] 4 4 4 5 4 4 5 3 5 4 ...
 $ X3.1 : num [1:100] 4 5 3 3 3 3 5 4 5 5 ...
 $ X3.2 : num [1:100] 5 4 5 4 4 4 5 4 5 5 ...
 $ X3.3 : num [1:100] 4 3 5 4 4 4 4 4 5 5 ...
 $ X3.4 : num [1:100] 5 5 5 5 4 3 5 4 5 5 ...
 $ X3.5 : num [1:100] 4 3 4 4 5 4 5 3 4 4 ...
 $ X3.6 : num [1:100] 3 4 4 4 3 3 5 4 4 5 ...
 $ X3.7 : num [1:100] 5 2 4 3 4 4 3 4 4 4 ...
 $ Y.1  : num [1:100] 5 5 4 5 5 3 5 4 5 4 ...
 $ Y.2  : num [1:100] 5 5 5 5 4 4 5 4 4 5 ...
 $ Y.3  : num [1:100] 5 5 5 5 4 3 5 4 4 5 ...
 $ Y.4  : num [1:100] 5 5 5 4 4 3 5 4 3 5 ...
 $ Y.5  : num [1:100] 4 5 5 4 5 4 5 4 4 5 ...
 $ Y.6  : num [1:100] 4 3 3 4 3 4 4 4 3 5 ...
 $ Y.7  : num [1:100] 5 3 4 3 5 4 4 4 4 5 ...
 $ X1   : num [1:100] 25 27 22 27 24 25 28 22 26 26 ...
 $ X2   : num [1:100] 28 27 28 29 24 23 28 19 26 26 ...
 $ X3   : num [1:100] 30 26 30 27 27 25 32 27 32 33 ...
 $ Y    : num [1:100] 33 31 31 30 30 25 33 28 27 34 ...
 $ resid: Named num [1:100] 0.867 0.753 -0.969 -1.985 1.72 ...
  ..- attr(*, "names")= chr [1:100] "1" "2" "3" "4" ...
#validity instrumen
df_validity <- df %>% as_tibble() %>% 
   mutate(sumX1 = rowSums(across(starts_with('X1.'))),
         sumX2 = rowSums(across(starts_with('X2.'))),
         sumX3 = rowSums(across(starts_with('X3.'))),
         sumY = rowSums(across(starts_with('Y.'))))
head(df_validity)
A tibble: 6 × 35
X1.1 X1.2 X1.3 X1.4 X1.5 X1.6 X2.1 X2.2 X2.3 X2.4 X2.5 X2.6 X3.1 X3.2 X3.3 X3.4 X3.5 X3.6 X3.7 Y.1 Y.2 Y.3 Y.4 Y.5 Y.6 Y.7 X1 X2 X3 Y resid sumX1 sumX2 sumX3 sumY
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
5 5 4 3 5 3 5 5 5 5 4 4 4 5 4 5 4 3 5 5 5 5 5 4 4 5 25 28 30 33 0.8668151 25 28 30 33
5 5 5 5 5 2 5 5 5 3 5 4 5 4 3 5 3 4 2 5 5 5 5 5 3 3 27 27 26 31 0.7530831 27 27 26 31
3 3 4 4 5 3 5 5 5 5 4 4 3 5 5 5 4 4 4 4 5 5 5 5 3 4 22 28 30 31 -0.9689718 22 28 30 31
5 4 5 5 4 4 5 5 5 4 5 5 3 4 4 5 4 4 3 5 5 5 4 4 4 3 27 29 27 30 -1.9852273 27 29 27 30
4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 5 3 4 5 4 4 4 5 3 5 24 24 27 30 1.7200560 24 24 27 30
4 4 4 5 4 4 3 4 4 4 4 4 3 4 4 3 4 3 4 3 4 3 3 4 4 4 25 23 25 25 -1.9827029 25 23 25 25
X1_instrumen <- df_validity[, grepl("X1", names(df_validity))]
X2_instrumen <- df_validity[, grepl("X2", names(df_validity))]
X3_instrumen <- df_validity[, grepl("X3", names(df_validity))]
Y_instrumen <- df_validity[, grepl("Y", names(df_validity))]
library(Hmisc) # You need to download it first.
rcorr(as.matrix(X1_instrumen), type="pearson")
       X1.1  X1.2  X1.3  X1.4  X1.5  X1.6   X1 sumX1
X1.1   1.00  0.61  0.55  0.39  0.46 -0.16 0.73  0.73
X1.2   0.61  1.00  0.55  0.47  0.46 -0.28 0.72  0.72
X1.3   0.55  0.55  1.00  0.52  0.56 -0.20 0.76  0.76
X1.4   0.39  0.47  0.52  1.00  0.59 -0.21 0.71  0.71
X1.5   0.46  0.46  0.56  0.59  1.00 -0.30 0.69  0.69
X1.6  -0.16 -0.28 -0.20 -0.21 -0.30  1.00 0.10  0.10
X1     0.73  0.72  0.76  0.71  0.69  0.10 1.00  1.00
sumX1  0.73  0.72  0.76  0.71  0.69  0.10 1.00  1.00

n= 100 


P
      X1.1   X1.2   X1.3   X1.4   X1.5   X1.6   X1     sumX1 
X1.1         0.0000 0.0000 0.0000 0.0000 0.1082 0.0000 0.0000
X1.2  0.0000        0.0000 0.0000 0.0000 0.0054 0.0000 0.0000
X1.3  0.0000 0.0000        0.0000 0.0000 0.0509 0.0000 0.0000
X1.4  0.0000 0.0000 0.0000        0.0000 0.0333 0.0000 0.0000
X1.5  0.0000 0.0000 0.0000 0.0000        0.0023 0.0000 0.0000
X1.6  0.1082 0.0054 0.0509 0.0333 0.0023        0.3275 0.3275
X1    0.0000 0.0000 0.0000 0.0000 0.0000 0.3275        0.0000
sumX1 0.0000 0.0000 0.0000 0.0000 0.0000 0.3275 0.0000       
#hapus x6
rcorr(as.matrix(X2_instrumen), type="pearson")
      X2.1 X2.2 X2.3 X2.4 X2.5 X2.6   X2 sumX2
X2.1  1.00 0.57 0.69 0.50 0.52 0.57 0.80  0.80
X2.2  0.57 1.00 0.57 0.48 0.65 0.52 0.78  0.78
X2.3  0.69 0.57 1.00 0.61 0.59 0.64 0.85  0.85
X2.4  0.50 0.48 0.61 1.00 0.56 0.66 0.79  0.79
X2.5  0.52 0.65 0.59 0.56 1.00 0.63 0.81  0.81
X2.6  0.57 0.52 0.64 0.66 0.63 1.00 0.83  0.83
X2    0.80 0.78 0.85 0.79 0.81 0.83 1.00  1.00
sumX2 0.80 0.78 0.85 0.79 0.81 0.83 1.00  1.00

n= 100 


P
      X2.1 X2.2 X2.3 X2.4 X2.5 X2.6 X2 sumX2
X2.1        0    0    0    0    0    0  0   
X2.2   0         0    0    0    0    0  0   
X2.3   0    0         0    0    0    0  0   
X2.4   0    0    0         0    0    0  0   
X2.5   0    0    0    0         0    0  0   
X2.6   0    0    0    0    0         0  0   
X2     0    0    0    0    0    0       0   
sumX2  0    0    0    0    0    0    0      
rcorr(as.matrix(X3_instrumen), type="pearson")
      X3.1 X3.2 X3.3 X3.4 X3.5 X3.6 X3.7   X3 sumX3
X3.1  1.00 0.68 0.63 0.43 0.50 0.53 0.10 0.75  0.75
X3.2  0.68 1.00 0.77 0.51 0.55 0.63 0.05 0.81  0.81
X3.3  0.63 0.77 1.00 0.50 0.65 0.71 0.14 0.85  0.85
X3.4  0.43 0.51 0.50 1.00 0.53 0.49 0.25 0.73  0.73
X3.5  0.50 0.55 0.65 0.53 1.00 0.66 0.20 0.80  0.80
X3.6  0.53 0.63 0.71 0.49 0.66 1.00 0.22 0.82  0.82
X3.7  0.10 0.05 0.14 0.25 0.20 0.22 1.00 0.39  0.39
X3    0.75 0.81 0.85 0.73 0.80 0.82 0.39 1.00  1.00
sumX3 0.75 0.81 0.85 0.73 0.80 0.82 0.39 1.00  1.00

n= 100 


P
      X3.1   X3.2   X3.3   X3.4   X3.5   X3.6   X3.7   X3     sumX3 
X3.1         0.0000 0.0000 0.0000 0.0000 0.0000 0.3425 0.0000 0.0000
X3.2  0.0000        0.0000 0.0000 0.0000 0.0000 0.6523 0.0000 0.0000
X3.3  0.0000 0.0000        0.0000 0.0000 0.0000 0.1541 0.0000 0.0000
X3.4  0.0000 0.0000 0.0000        0.0000 0.0000 0.0123 0.0000 0.0000
X3.5  0.0000 0.0000 0.0000 0.0000        0.0000 0.0484 0.0000 0.0000
X3.6  0.0000 0.0000 0.0000 0.0000 0.0000        0.0255 0.0000 0.0000
X3.7  0.3425 0.6523 0.1541 0.0123 0.0484 0.0255        0.0000 0.0000
X3    0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000        0.0000
sumX3 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000       
rcorr(as.matrix(Y_instrumen), type="pearson")
      Y.1  Y.2  Y.3  Y.4  Y.5  Y.6  Y.7    Y sumY
Y.1  1.00 0.79 0.55 0.46 0.41 0.30 0.36 0.71 0.71
Y.2  0.79 1.00 0.65 0.46 0.50 0.34 0.45 0.77 0.77
Y.3  0.55 0.65 1.00 0.42 0.58 0.51 0.56 0.80 0.80
Y.4  0.46 0.46 0.42 1.00 0.49 0.45 0.47 0.71 0.71
Y.5  0.41 0.50 0.58 0.49 1.00 0.67 0.74 0.82 0.82
Y.6  0.30 0.34 0.51 0.45 0.67 1.00 0.63 0.74 0.74
Y.7  0.36 0.45 0.56 0.47 0.74 0.63 1.00 0.79 0.79
Y    0.71 0.77 0.80 0.71 0.82 0.74 0.79 1.00 1.00
sumY 0.71 0.77 0.80 0.71 0.82 0.74 0.79 1.00 1.00

n= 100 


P
     Y.1    Y.2    Y.3    Y.4    Y.5    Y.6    Y.7    Y      sumY  
Y.1         0.0000 0.0000 0.0000 0.0000 0.0027 0.0003 0.0000 0.0000
Y.2  0.0000        0.0000 0.0000 0.0000 0.0005 0.0000 0.0000 0.0000
Y.3  0.0000 0.0000        0.0000 0.0000 0.0000 0.0000 0.0000 0.0000
Y.4  0.0000 0.0000 0.0000        0.0000 0.0000 0.0000 0.0000 0.0000
Y.5  0.0000 0.0000 0.0000 0.0000        0.0000 0.0000 0.0000 0.0000
Y.6  0.0027 0.0005 0.0000 0.0000 0.0000        0.0000 0.0000 0.0000
Y.7  0.0003 0.0000 0.0000 0.0000 0.0000 0.0000        0.0000 0.0000
Y    0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000        0.0000
sumY 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000       
#hapus X1.6 saja
df_clean <- subset(df_validity, select = -c(X1.6, X1, X2, X3, Y, resid, sumX1, sumX2, sumX3, sumY))
head(df_clean)
A tibble: 6 × 30
X1.1 X1.2 X1.3 X1.4 X1.5 X2.1 X2.2 X2.3 X2.4 X2.5 X2.6 X3.1 X3.2 X3.3 X3.4 X3.5 X3.6 X3.7 Y.1 Y.2 Y.3 Y.4 Y.5 Y.6 Y.7 X1 X2 X3 Y resid
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
5 5 4 3 5 5 5 5 5 4 4 4 5 4 5 4 3 5 5 5 5 5 4 4 5 22 28 30 33 0.9228734
5 5 5 5 5 5 5 5 3 5 4 5 4 3 5 3 4 2 5 5 5 5 5 3 3 25 27 26 31 0.6098644
3 3 4 4 5 5 5 5 5 4 4 3 5 5 5 4 4 4 4 5 5 5 5 3 4 19 28 30 31 -0.7990978
5 4 5 5 4 5 5 5 4 5 5 3 4 4 5 4 4 3 5 5 5 4 4 4 3 23 29 27 30 -1.9035415
4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 5 3 4 5 4 4 4 5 3 5 20 24 27 30 1.8244974
4 4 4 5 4 3 4 4 4 4 4 3 4 4 3 4 3 4 3 4 3 3 4 4 4 21 23 25 25 -1.9406680
df_clean <- df_clean %>% as_tibble() %>% 
   mutate(X1 = rowSums(across(starts_with('X1.'))),
         X2 = rowSums(across(starts_with('X2.'))),
         X3 = rowSums(across(starts_with('X3.'))),
         Y = rowSums(across(starts_with('Y.'))))
head(df_clean)
A tibble: 6 × 30
X1.1 X1.2 X1.3 X1.4 X1.5 X2.1 X2.2 X2.3 X2.4 X2.5 X2.6 X3.1 X3.2 X3.3 X3.4 X3.5 X3.6 X3.7 Y.1 Y.2 Y.3 Y.4 Y.5 Y.6 Y.7 X1 X2 X3 Y resid
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
5 5 4 3 5 5 5 5 5 4 4 4 5 4 5 4 3 5 5 5 5 5 4 4 5 22 28 30 33 0.9228734
5 5 5 5 5 5 5 5 3 5 4 5 4 3 5 3 4 2 5 5 5 5 5 3 3 25 27 26 31 0.6098644
3 3 4 4 5 5 5 5 5 4 4 3 5 5 5 4 4 4 4 5 5 5 5 3 4 19 28 30 31 -0.7990978
5 4 5 5 4 5 5 5 4 5 5 3 4 4 5 4 4 3 5 5 5 4 4 4 3 23 29 27 30 -1.9035415
4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 5 3 4 5 4 4 4 5 3 5 20 24 27 30 1.8244974
4 4 4 5 4 3 4 4 4 4 4 3 4 4 3 4 3 4 3 4 3 3 4 4 4 21 23 25 25 -1.9406680
#model clean
model_clean <- lm(Y~X1 + X2 + X3, data=df_clean)
summary(model_clean)
Call:
lm(formula = Y ~ X1 + X2 + X3, data = df_clean)

Residuals:
   Min     1Q Median     3Q    Max 
-9.494 -1.411 -0.107  1.429  7.819 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  1.15556    2.05610   0.562    0.575    
X1           0.09268    0.13165   0.704    0.483    
X2           0.69000    0.10964   6.293 9.25e-09 ***
X3           0.31875    0.07725   4.126 7.86e-05 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 2.71 on 96 degrees of freedom
Multiple R-squared:  0.6979,    Adjusted R-squared:  0.6885 
F-statistic: 73.92 on 3 and 96 DF,  p-value: < 2.2e-16
df_clean$resid <- model_clean$residuals
hist(df_clean$resid)
png
png
coeftest(model_clean, vcov = vcovHC(model_clean, type = 'HC1'))
t test of coefficients:

            Estimate Std. Error t value  Pr(>|t|)    
(Intercept) 1.155559   1.745136  0.6622 0.5094553    
X1          0.092676   0.160897  0.5760 0.5659658    
X2          0.690002   0.143027  4.8243 5.281e-06 ***
X3          0.318754   0.086487  3.6856 0.0003777 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
stargazer(model, model1, model_clean,  title="Results", type = "text")
Results
========================================================================================
                                            Dependent variable:                         
                    --------------------------------------------------------------------
                                                     Y                                  
                             (1)                    (2)                    (3)          
----------------------------------------------------------------------------------------
X1                          0.055                  0.144                  0.093         
                           (0.134)                (0.128)                (0.132)        
                                                                                        
X2                         0.708***               0.583***               0.690***       
                           (0.108)                (0.106)                (0.110)        
                                                                                        
X3                         0.322***               0.260***               0.319***       
                           (0.077)                (0.077)                (0.077)        
                                                                                        
Constant                    1.278                  3.886                  1.156         
                           (2.375)                (2.383)                (2.056)        
                                                                                        
----------------------------------------------------------------------------------------
Observations                 100                     96                    100          
R2                          0.697                  0.609                  0.698         
Adjusted R2                 0.687                  0.596                  0.688         
Residual Std. Error    2.714 (df = 96)        2.540 (df = 92)        2.710 (df = 96)    
F Statistic         73.562*** (df = 3; 96) 47.760*** (df = 3; 92) 73.923*** (df = 3; 96)
========================================================================================
Note:                                                        *p<0.1; **p<0.05; ***p<0.01
robust_model <- sqrt(diag(vcovHC(model, type = "HC1")))
robust_model1 <- sqrt(diag(vcovHC(model1, type = "HC1")))
robust_model_clean <- sqrt(diag(vcovHC(model_clean, type = "HC1")))

stargazer(model, model1, model_clean,  title="Results", type = "text", se = list(robust_model, robust_model1, robust_model_clean))
Results
========================================================================================
                                            Dependent variable:                         
                    --------------------------------------------------------------------
                                                     Y                                  
                             (1)                    (2)                    (3)          
----------------------------------------------------------------------------------------
X1                          0.055                  0.144                  0.093         
                           (0.143)                (0.137)                (0.161)        
                                                                                        
X2                         0.708***               0.583***               0.690***       
                           (0.130)                (0.124)                (0.143)        
                                                                                        
X3                         0.322***               0.260***               0.319***       
                           (0.086)                (0.082)                (0.086)        
                                                                                        
Constant                    1.278                  3.886*                 1.156         
                           (2.018)                (2.046)                (1.745)        
                                                                                        
----------------------------------------------------------------------------------------
Observations                 100                     96                    100          
R2                          0.697                  0.609                  0.698         
Adjusted R2                 0.687                  0.596                  0.688         
Residual Std. Error    2.714 (df = 96)        2.540 (df = 92)        2.710 (df = 96)    
F Statistic         73.562*** (df = 3; 96) 47.760*** (df = 3; 92) 73.923*** (df = 3; 96)
========================================================================================
Note:                                                        *p<0.1; **p<0.05; ***p<0.01