Hồi quy OLS với 7 sai phạm thường gặp

Kết nối dữ liệu

setwd("c:/vidu")
library(readxl)
dulieu <- read_excel("data15.10.xlsx")
head(dulieu)
## # A tibble: 6 x 17
##   Year   Time  KFDI LnKFDI  VFDI LnVFDI   EXP  LnEX   IMP  LnIM    EXC LnEXC
##   <chr> <dbl> <dbl>  <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl>
## 1 2000~  1945 10.6    2.36   225   5.42  3111  8.04  3358  8.12 14053.  9.55
## 2 2000~  1946 11.3    2.42   225   5.42  3495  8.16  3931  8.28 14075   9.55
## 3 2000~  1947 11.6    2.45   250   5.52  3945  8.28  3911  8.27 14120.  9.56
## 4 2000~  1948 38.3    3.65   598   6.39  3896  8.27  4438  8.40 14423   9.58
## 5 2001~  1949 12.6    2.54   199   5.29  3628  8.20  3624  8.20 14548.  9.59
## 6 2001~  1950  9.63   2.26   301   5.71  3973  8.29  4206  8.34 14643.  9.59
## # ... with 5 more variables: USGR <dbl>, LnGDP <dbl>, LnK <dbl>, LnL <dbl>,
## #   LnEng <dbl>
congthuc <- LnGDP~LnIM

Đồ thị mối quan hệ giữa LnGDP với LnIM

library(ggplot2)
ggplot(data=dulieu)+ 
  geom_point(aes(x=LnIM,y=LnGDP, color=factor(Year)), size=dulieu$LnGDP) +
  geom_smooth(method = "lm", aes(x=LnIM,y=LnGDP), size=2, color="red", se = FALSE) +
  theme(legend.position = "none")

Hồi quy tuyến tính bằng phương pháp ols

hoiquy <-lm(congthuc, data=dulieu)
summary(hoiquy)
## 
## Call:
## lm(formula = congthuc, data = dulieu)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.08132 -0.26085 -0.02018  0.28822  1.07475 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -7.22258    0.52122  -13.86   <2e-16 ***
## LnIM         1.26660    0.05352   23.67   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4259 on 74 degrees of freedom
## Multiple R-squared:  0.8833, Adjusted R-squared:  0.8817 
## F-statistic: 560.1 on 1 and 74 DF,  p-value: < 2.2e-16

kiểm tra đa cộng tuyến

library(car)
## Loading required package: carData
#vif(hoiquy)

Kiểm tra phương sai của phần dư

spreadLevelPlot(hoiquy)

## 
## Suggested power transformation:  2.008078
ncvTest(hoiquy)        
## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 7.967705, Df = 1, p = 0.0047619
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
bptest(hoiquy)
## 
##  studentized Breusch-Pagan test
## 
## data:  hoiquy
## BP = 7.4486, df = 1, p-value = 0.006349

Kiểm tra tự tương quan của phần dư

durbinWatsonTest(hoiquy)
##  lag Autocorrelation D-W Statistic p-value
##    1       0.5433096     0.8762355       0
##  Alternative hypothesis: rho != 0
dwtest(hoiquy)
## 
##  Durbin-Watson test
## 
## data:  hoiquy
## DW = 0.87624, p-value = 1.738e-08
## alternative hypothesis: true autocorrelation is greater than 0

Kiểm định Wald về ràng buột hệ số hồi quy

library(AER)
## Loading required package: sandwich
## Loading required package: survival
car::linearHypothesis(hoiquy,"LnIM=1.2667")
## Linear hypothesis test
## 
## Hypothesis:
## LnIM = 1.2667
## 
## Model 1: restricted model
## Model 2: LnGDP ~ LnIM
## 
##   Res.Df    RSS Df  Sum of Sq  F Pr(>F)
## 1     75 13.425                        
## 2     74 13.425  1 5.9352e-07  0 0.9986

Kiểm tra phần dư có phân phối chuẩn

phandu <-residuals(hoiquy)
hist(phandu)

library(fBasics)
## Loading required package: timeDate
## Loading required package: timeSeries
## 
## Attaching package: 'timeSeries'
## The following object is masked from 'package:zoo':
## 
##     time<-
## 
## Attaching package: 'fBasics'
## The following object is masked from 'package:car':
## 
##     densityPlot
jarqueberaTest(phandu)
## 
## Title:
##  Jarque - Bera Normalality Test
## 
## Test Results:
##   STATISTIC:
##     X-squared: 0.6092
##   P VALUE:
##     Asymptotic p Value: 0.7374 
## 
## Description:
##  Thu Mar 26 12:00:45 2020 by user: Admin
shapiroTest(phandu)
## 
## Title:
##  Shapiro - Wilk Normality Test
## 
## Test Results:
##   STATISTIC:
##     W: 0.9892
##   P VALUE:
##     0.7726 
## 
## Description:
##  Thu Mar 26 12:00:45 2020 by user: Admin

Kiểm tra thiếu biến bởi Ramsey’s RESET

resettest(hoiquy, power=2, type="regressor")
## 
##  RESET test
## 
## data:  hoiquy
## RESET = 15.234, df1 = 1, df2 = 73, p-value = 0.0002096
resettest(data=dulieu,congthuc, power=2, type="regressor")
## 
##  RESET test
## 
## data:  congthuc
## RESET = 15.234, df1 = 1, df2 = 73, p-value = 0.0002096

Ước lượng là tuyến tính

library(conf)
crPlots(hoiquy)

#ceresPlots(hoiquy)

Kiểm tra giá trị ngoại vi

outlierTest(hoiquy)
## No Studentized residuals with Bonferroni p < 0.05
## Largest |rstudent|:
##    rstudent unadjusted p-value Bonferroni p
## 6 -2.701985          0.0085658        0.651