Hồi quy OLS với 7 sai phạm thường gặp
Kết nối dữ liệu
setwd("c:/vidu")
library(readxl)
dulieu <- read_excel("data15.10.xlsx")
head(dulieu)
## # A tibble: 6 x 17
## Year Time KFDI LnKFDI VFDI LnVFDI EXP LnEX IMP LnIM EXC LnEXC
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2000~ 1945 10.6 2.36 225 5.42 3111 8.04 3358 8.12 14053. 9.55
## 2 2000~ 1946 11.3 2.42 225 5.42 3495 8.16 3931 8.28 14075 9.55
## 3 2000~ 1947 11.6 2.45 250 5.52 3945 8.28 3911 8.27 14120. 9.56
## 4 2000~ 1948 38.3 3.65 598 6.39 3896 8.27 4438 8.40 14423 9.58
## 5 2001~ 1949 12.6 2.54 199 5.29 3628 8.20 3624 8.20 14548. 9.59
## 6 2001~ 1950 9.63 2.26 301 5.71 3973 8.29 4206 8.34 14643. 9.59
## # ... with 5 more variables: USGR <dbl>, LnGDP <dbl>, LnK <dbl>, LnL <dbl>,
## # LnEng <dbl>
congthuc <- LnGDP~LnIM
Đồ thị mối quan hệ giữa LnGDP với LnIM
library(ggplot2)
ggplot(data=dulieu)+
geom_point(aes(x=LnIM,y=LnGDP, color=factor(Year)), size=dulieu$LnGDP) +
geom_smooth(method = "lm", aes(x=LnIM,y=LnGDP), size=2, color="red", se = FALSE) +
theme(legend.position = "none")

Hồi quy tuyến tính bằng phương pháp ols
hoiquy <-lm(congthuc, data=dulieu)
summary(hoiquy)
##
## Call:
## lm(formula = congthuc, data = dulieu)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.08132 -0.26085 -0.02018 0.28822 1.07475
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.22258 0.52122 -13.86 <2e-16 ***
## LnIM 1.26660 0.05352 23.67 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4259 on 74 degrees of freedom
## Multiple R-squared: 0.8833, Adjusted R-squared: 0.8817
## F-statistic: 560.1 on 1 and 74 DF, p-value: < 2.2e-16
kiểm tra đa cộng tuyến
library(car)
## Loading required package: carData
#vif(hoiquy)
Kiểm tra phương sai của phần dư
spreadLevelPlot(hoiquy)

##
## Suggested power transformation: 2.008078
ncvTest(hoiquy)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 7.967705, Df = 1, p = 0.0047619
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
bptest(hoiquy)
##
## studentized Breusch-Pagan test
##
## data: hoiquy
## BP = 7.4486, df = 1, p-value = 0.006349
Kiểm tra tự tương quan của phần dư
durbinWatsonTest(hoiquy)
## lag Autocorrelation D-W Statistic p-value
## 1 0.5433096 0.8762355 0
## Alternative hypothesis: rho != 0
dwtest(hoiquy)
##
## Durbin-Watson test
##
## data: hoiquy
## DW = 0.87624, p-value = 1.738e-08
## alternative hypothesis: true autocorrelation is greater than 0
Kiểm định Wald về ràng buột hệ số hồi quy
library(AER)
## Loading required package: sandwich
## Loading required package: survival
car::linearHypothesis(hoiquy,"LnIM=1.2667")
## Linear hypothesis test
##
## Hypothesis:
## LnIM = 1.2667
##
## Model 1: restricted model
## Model 2: LnGDP ~ LnIM
##
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 75 13.425
## 2 74 13.425 1 5.9352e-07 0 0.9986
Kiểm tra phần dư có phân phối chuẩn
phandu <-residuals(hoiquy)
hist(phandu)

library(fBasics)
## Loading required package: timeDate
## Loading required package: timeSeries
##
## Attaching package: 'timeSeries'
## The following object is masked from 'package:zoo':
##
## time<-
##
## Attaching package: 'fBasics'
## The following object is masked from 'package:car':
##
## densityPlot
jarqueberaTest(phandu)
##
## Title:
## Jarque - Bera Normalality Test
##
## Test Results:
## STATISTIC:
## X-squared: 0.6092
## P VALUE:
## Asymptotic p Value: 0.7374
##
## Description:
## Thu Mar 26 12:00:45 2020 by user: Admin
shapiroTest(phandu)
##
## Title:
## Shapiro - Wilk Normality Test
##
## Test Results:
## STATISTIC:
## W: 0.9892
## P VALUE:
## 0.7726
##
## Description:
## Thu Mar 26 12:00:45 2020 by user: Admin
Kiểm tra thiếu biến bởi Ramsey’s RESET
resettest(hoiquy, power=2, type="regressor")
##
## RESET test
##
## data: hoiquy
## RESET = 15.234, df1 = 1, df2 = 73, p-value = 0.0002096
resettest(data=dulieu,congthuc, power=2, type="regressor")
##
## RESET test
##
## data: congthuc
## RESET = 15.234, df1 = 1, df2 = 73, p-value = 0.0002096
Ước lượng là tuyến tính
library(conf)
crPlots(hoiquy)

#ceresPlots(hoiquy)
Kiểm tra giá trị ngoại vi
outlierTest(hoiquy)
## No Studentized residuals with Bonferroni p < 0.05
## Largest |rstudent|:
## rstudent unadjusted p-value Bonferroni p
## 6 -2.701985 0.0085658 0.651