Analisis Regresi Berganda (multiple regression)

Multikolinieritas terjadi jika nilai VIF > 10

Data 1

#==================================DATA Number of Cases===========================================
library(readxl)
antar=read_excel("E:\\Praktikum-5.xlsx",sheet="Sheet1")

X=as.matrix(antar[,-1:-2])
#model regresi
regresi1=lm(y~x1+x2,data=antar) #full model
#melihat matriks korelasi
library(PerformanceAnalytics)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
## 
##     legend
chart.Correlation(antar[,-1], histogram=FALSE, pch=19)

#uji F parsial
reduced_model1=lm(y~x1,data=antar) #melihat pengaruh variabel x2
anova(regresi1,reduced_model1)
## Analysis of Variance Table
## 
## Model 1: y ~ x1 + x2
## Model 2: y ~ x1
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1     22 233.73                                  
## 2     23 402.13 -1    -168.4 15.851 0.0006312 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#mendeteksi multikolinearitas
library(car)
## Loading required package: carData
vif(regresi1)
##       x1       x2 
## 3.118474 3.118474

Data 2

#======================================DATA EEO===================================================
#memasukkan data
eeo=read_excel("E:\\praktikum 7 EEO.xlsx",sheet="data")
eeo
## # A tibble: 70 x 5
##      ROW   ACHV    FAM    PEER  SCHOOL
##    <dbl>  <dbl>  <dbl>   <dbl>   <dbl>
##  1     1 -0.431  0.608  0.0351  0.166 
##  2     2  0.800  0.794  0.479   0.534 
##  3     3 -0.925 -0.826 -0.620  -0.786 
##  4     4 -2.19  -1.25  -1.22   -1.04  
##  5     5 -2.85   0.174 -0.185   0.142 
##  6     6 -0.662  0.202  0.128   0.273 
##  7     7  2.64   0.242 -0.0902  0.0497
##  8     8  2.36   0.594  0.218   0.519 
##  9     9 -0.913 -0.616 -0.490  -0.632 
## 10    10  0.594  0.994  0.622   0.934 
## # ... with 60 more rows
XX=as.matrix(eeo[,-1:-2])
#melihat matriks korelasi
library(PerformanceAnalytics)
chart.Correlation(eeo[,-1], histogram=FALSE, pch=19)

#model regresi berganda
regresi2=lm(ACHV~FAM+PEER+SCHOOL,data=eeo) #full model
#uji F parsial
full_model=lm(ACHV~FAM+PEER+SCHOOL,data=eeo) #sama dengan model "regresi" di atas
reduced_model2=lm(ACHV~FAM,data=eeo) #melihat pengaruh variabel PEER dan SCHOOL
anova(full_model,reduced_model2)
## Analysis of Variance Table
## 
## Model 1: ACHV ~ FAM + PEER + SCHOOL
## Model 2: ACHV ~ FAM
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     66 282.87                           
## 2     68 293.68 -2   -10.803 1.2603 0.2903
summary(regresi2) #summary full model
## 
## Call:
## lm(formula = ACHV ~ FAM + PEER + SCHOOL, data = eeo)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.2096 -1.3934 -0.2947  1.1415  4.5881 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.06996    0.25064  -0.279    0.781
## FAM          1.10126    1.41056   0.781    0.438
## PEER         2.32206    1.48129   1.568    0.122
## SCHOOL      -2.28100    2.22045  -1.027    0.308
## 
## Residual standard error: 2.07 on 66 degrees of freedom
## Multiple R-squared:  0.2063, Adjusted R-squared:  0.1702 
## F-statistic: 5.717 on 3 and 66 DF,  p-value: 0.001535
Ftabel=qf(0.95,3,66) #dbr=k, dbs=n-p
Ftabel
## [1] 2.743711
#mendeteksi multikolinearitas
vif(regresi2)
##      FAM     PEER   SCHOOL 
## 37.58064 30.21166 83.15544
cor(XX)
##              FAM      PEER    SCHOOL
## FAM    1.0000000 0.9600806 0.9856837
## PEER   0.9600806 1.0000000 0.9821601
## SCHOOL 0.9856837 0.9821601 1.0000000
aa=t(XX)%*%XX
eigen(aa)
## eigen() decomposition
## $values
## [1] 209.3295610   2.7303093   0.5833274
## 
## $vectors
##           [,1]       [,2]       [,3]
## [1,] 0.6174223  0.6698093 -0.4124865
## [2,] 0.5243779 -0.7413256 -0.4188844
## [3,] 0.5863595 -0.0423298  0.8089442

Data 3

#==================================DATA Salary Survey=============================================
#memasukkan data
salary=read_excel("E:\\Praktikum 7 salary survey.xlsx",sheet = "Sheet1")
salary$E=as.factor(salary$E)
salary$M=as.factor(salary$M)
reg_salary=lm(S~X+E+M,data=salary)
summary(reg_salary)
## 
## Call:
## lm(formula = S ~ X + E + M, data = salary)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1884.60  -653.60    22.23   844.85  1716.47 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  8035.60     386.69  20.781  < 2e-16 ***
## X             546.18      30.52  17.896  < 2e-16 ***
## E2           3144.04     361.97   8.686 7.73e-11 ***
## E3           2996.21     411.75   7.277 6.72e-09 ***
## M1           6883.53     313.92  21.928  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1027 on 41 degrees of freedom
## Multiple R-squared:  0.9568, Adjusted R-squared:  0.9525 
## F-statistic: 226.8 on 4 and 41 DF,  p-value: < 2.2e-16
#cek dummy variabel
contrasts(salary$E) #belum sesuai dengan yang diinginkan
##   2 3
## 1 0 0
## 2 1 0
## 3 0 1
salary$E=relevel(salary$E,ref="3")
contrasts(salary$E)
##   1 2
## 3 0 0
## 1 1 0
## 2 0 1
contrasts(salary$M) #sudah sesuai
##   1
## 0 0
## 1 1
#regresikan ulang
reg_salary=lm(S~X+E+M,data=salary)
summary(reg_salary)$coef
##               Estimate Std. Error   t value     Pr(>|t|)
## (Intercept) 11031.8079  383.21713 28.787356 8.086512e-29
## X             546.1840   30.51919 17.896411 5.546313e-21
## E1          -2996.2103  411.75271 -7.276723 6.722373e-09
## E2            147.8249  387.65932  0.381327 7.049297e-01
## M1           6883.5310  313.91898 21.927731 2.901444e-24