Analisis Regresi Berganda (multiple regression)
Multikolinieritas terjadi jika nilai VIF > 10
#==================================DATA Number of Cases===========================================
library(readxl)
antar=read_excel("E:\\Praktikum-5.xlsx",sheet="Sheet1")
X=as.matrix(antar[,-1:-2])
#model regresi
regresi1=lm(y~x1+x2,data=antar) #full model
#melihat matriks korelasi
library(PerformanceAnalytics)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
chart.Correlation(antar[,-1], histogram=FALSE, pch=19)
#uji F parsial
reduced_model1=lm(y~x1,data=antar) #melihat pengaruh variabel x2
anova(regresi1,reduced_model1)
## Analysis of Variance Table
##
## Model 1: y ~ x1 + x2
## Model 2: y ~ x1
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 22 233.73
## 2 23 402.13 -1 -168.4 15.851 0.0006312 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#mendeteksi multikolinearitas
library(car)
## Loading required package: carData
vif(regresi1)
## x1 x2
## 3.118474 3.118474
#======================================DATA EEO===================================================
#memasukkan data
eeo=read_excel("E:\\praktikum 7 EEO.xlsx",sheet="data")
eeo
## # A tibble: 70 x 5
## ROW ACHV FAM PEER SCHOOL
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 -0.431 0.608 0.0351 0.166
## 2 2 0.800 0.794 0.479 0.534
## 3 3 -0.925 -0.826 -0.620 -0.786
## 4 4 -2.19 -1.25 -1.22 -1.04
## 5 5 -2.85 0.174 -0.185 0.142
## 6 6 -0.662 0.202 0.128 0.273
## 7 7 2.64 0.242 -0.0902 0.0497
## 8 8 2.36 0.594 0.218 0.519
## 9 9 -0.913 -0.616 -0.490 -0.632
## 10 10 0.594 0.994 0.622 0.934
## # ... with 60 more rows
XX=as.matrix(eeo[,-1:-2])
#melihat matriks korelasi
library(PerformanceAnalytics)
chart.Correlation(eeo[,-1], histogram=FALSE, pch=19)
#model regresi berganda
regresi2=lm(ACHV~FAM+PEER+SCHOOL,data=eeo) #full model
#uji F parsial
full_model=lm(ACHV~FAM+PEER+SCHOOL,data=eeo) #sama dengan model "regresi" di atas
reduced_model2=lm(ACHV~FAM,data=eeo) #melihat pengaruh variabel PEER dan SCHOOL
anova(full_model,reduced_model2)
## Analysis of Variance Table
##
## Model 1: ACHV ~ FAM + PEER + SCHOOL
## Model 2: ACHV ~ FAM
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 66 282.87
## 2 68 293.68 -2 -10.803 1.2603 0.2903
summary(regresi2) #summary full model
##
## Call:
## lm(formula = ACHV ~ FAM + PEER + SCHOOL, data = eeo)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.2096 -1.3934 -0.2947 1.1415 4.5881
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.06996 0.25064 -0.279 0.781
## FAM 1.10126 1.41056 0.781 0.438
## PEER 2.32206 1.48129 1.568 0.122
## SCHOOL -2.28100 2.22045 -1.027 0.308
##
## Residual standard error: 2.07 on 66 degrees of freedom
## Multiple R-squared: 0.2063, Adjusted R-squared: 0.1702
## F-statistic: 5.717 on 3 and 66 DF, p-value: 0.001535
Ftabel=qf(0.95,3,66) #dbr=k, dbs=n-p
Ftabel
## [1] 2.743711
#mendeteksi multikolinearitas
vif(regresi2)
## FAM PEER SCHOOL
## 37.58064 30.21166 83.15544
cor(XX)
## FAM PEER SCHOOL
## FAM 1.0000000 0.9600806 0.9856837
## PEER 0.9600806 1.0000000 0.9821601
## SCHOOL 0.9856837 0.9821601 1.0000000
aa=t(XX)%*%XX
eigen(aa)
## eigen() decomposition
## $values
## [1] 209.3295610 2.7303093 0.5833274
##
## $vectors
## [,1] [,2] [,3]
## [1,] 0.6174223 0.6698093 -0.4124865
## [2,] 0.5243779 -0.7413256 -0.4188844
## [3,] 0.5863595 -0.0423298 0.8089442
#==================================DATA Salary Survey=============================================
#memasukkan data
salary=read_excel("E:\\Praktikum 7 salary survey.xlsx",sheet = "Sheet1")
salary$E=as.factor(salary$E)
salary$M=as.factor(salary$M)
reg_salary=lm(S~X+E+M,data=salary)
summary(reg_salary)
##
## Call:
## lm(formula = S ~ X + E + M, data = salary)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1884.60 -653.60 22.23 844.85 1716.47
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8035.60 386.69 20.781 < 2e-16 ***
## X 546.18 30.52 17.896 < 2e-16 ***
## E2 3144.04 361.97 8.686 7.73e-11 ***
## E3 2996.21 411.75 7.277 6.72e-09 ***
## M1 6883.53 313.92 21.928 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1027 on 41 degrees of freedom
## Multiple R-squared: 0.9568, Adjusted R-squared: 0.9525
## F-statistic: 226.8 on 4 and 41 DF, p-value: < 2.2e-16
#cek dummy variabel
contrasts(salary$E) #belum sesuai dengan yang diinginkan
## 2 3
## 1 0 0
## 2 1 0
## 3 0 1
salary$E=relevel(salary$E,ref="3")
contrasts(salary$E)
## 1 2
## 3 0 0
## 1 1 0
## 2 0 1
contrasts(salary$M) #sudah sesuai
## 1
## 0 0
## 1 1
#regresikan ulang
reg_salary=lm(S~X+E+M,data=salary)
summary(reg_salary)$coef
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11031.8079 383.21713 28.787356 8.086512e-29
## X 546.1840 30.51919 17.896411 5.546313e-21
## E1 -2996.2103 411.75271 -7.276723 6.722373e-09
## E2 147.8249 387.65932 0.381327 7.049297e-01
## M1 6883.5310 313.91898 21.927731 2.901444e-24