1. Kết nối dữ liệu

library(AER)
## Loading required package: car
## Loading required package: carData
## Loading required package: lmtest
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: survival
library(readxl)
library(fUnitRoots)
## Loading required package: timeDate
## Loading required package: timeSeries
## 
## Attaching package: 'timeSeries'
## The following object is masked from 'package:zoo':
## 
##     time<-
## Loading required package: fBasics
## 
## Attaching package: 'fBasics'
## The following object is masked from 'package:car':
## 
##     densityPlot
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(MLmetrics)
## 
## Attaching package: 'MLmetrics'
## The following object is masked from 'package:base':
## 
##     Recall
setwd("d:/DATA2020/arimaVSholtwiner")
dulieu <-read_excel("dubao.xlsx")
dulieu <-ts(dulieu,star=c(2010,10), frequency = 12)
head(dulieu)
##               Month        Quy Time Total Revenues Rev Rev2
## Oct 2010 1317427200 1317427200   88              0   0    0
## Nov 2010 1320105600 1320105600   79              0   0    0
## Dec 2010 1322697600 1322697600   18              0   0    0
## Jan 2011 1325376000 1325376000   36              0   0    0
## Feb 2011 1328054400 1328054400   27              0   0    0
## Mar 2011 1330560000 1330560000   61              0   0    0

2. Vẽ đồ thị thời gian

# Dữ liệu bậc gốc
plot.ts(dulieu[,5])

# Dữ liệu sai phân bậc 1
plot.ts(diff(dulieu[,5]))

3. Hồi quy ARIMA thủ công

# Tìm d, ta được d=1
# Kiểm định unit root test dữ liệu gốc
adfTest(dulieu[,5], lags=12, type=(c("nc")))
## 
## Title:
##  Augmented Dickey-Fuller Test
## 
## Test Results:
##   PARAMETER:
##     Lag Order: 12
##   STATISTIC:
##     Dickey-Fuller: -0.6379
##   P VALUE:
##     0.4099 
## 
## Description:
##  Sun Aug 09 12:36:13 2020 by user: Admin
#Kiểm định unit root test dữ liệu sai phân bậc 1
adfTest(diff(dulieu[,5]), lags=12, type="nc")
## Warning in adfTest(diff(dulieu[, 5]), lags = 12, type = "nc"): p-value smaller
## than printed p-value
## 
## Title:
##  Augmented Dickey-Fuller Test
## 
## Test Results:
##   PARAMETER:
##     Lag Order: 12
##   STATISTIC:
##     Dickey-Fuller: -4.1881
##   P VALUE:
##     0.01 
## 
## Description:
##  Sun Aug 09 12:36:13 2020 by user: Admin
# Tìm p, ta được p=4
pacf(diff(dulieu[,5]))

# tìm q, ta được q=1
acf(diff(dulieu[,5]))

# Hồi quy ARIMA
hoiquy1 <- Arima(dulieu[,5], order=c(4,1,1), seasonal=c(1,1,1))
hoiquy1
## Series: dulieu[, 5] 
## ARIMA(4,1,1)(1,1,1)[12] 
## 
## Coefficients:
##          ar1      ar2      ar3     ar4      ma1    sar1     sma1
##       0.0063  -0.3109  -0.0578  0.0540  -0.7425  0.1201  -0.9993
## s.e.  0.3125   0.2314   0.2337  0.2043   0.2906  0.1216   0.1539
## 
## sigma^2 estimated as 6.899e+09:  log likelihood=-1168.57
## AIC=2353.14   AICc=2354.89   BIC=2373.22
uocluong1 <- fitted(hoiquy1)
AIC(hoiquy1)
## [1] 2353.138
BIC(hoiquy1)
## [1] 2373.225
MSE(uocluong1,dulieu[,5])
## [1] 5572246515
MSE(uocluong1,hoiquy1$x)
## [1] 5572246515
RMSE(uocluong1,dulieu[,5])
## [1] 74647.48
MAE(uocluong1,dulieu[,5])
## [1] 52623.9
MAPE(uocluong1,dulieu[,5])
## [1] NaN

4. Hồi quy ARIMA tự động

hoiquy2 <- auto.arima(dulieu[,5], ic="bic", seasonal=T)
summary(hoiquy2)
## Series: dulieu[, 5] 
## ARIMA(0,1,1)(1,0,0)[12] 
## 
## Coefficients:
##           ma1    sar1
##       -0.8796  0.2483
## s.e.   0.0539  0.0928
## 
## sigma^2 estimated as 7.591e+09:  log likelihood=-1317.85
## AIC=2641.7   AICc=2641.94   BIC=2649.6
## 
## Training set error measures:
##                    ME     RMSE      MAE MPE MAPE      MASE       ACF1
## Training set 3842.122 85861.33 57673.92 NaN  Inf 0.7763007 0.09615834
uocluong2 <- fitted(hoiquy2)
MSE(uocluong2,hoiquy2$x)
## [1] 7372168722

5. Hồi quy Holt Winter

hoiquy3 <- hw(dulieu[,5], seasonal = c("additive"))
summary(hoiquy3)
## 
## Forecast method: Holt-Winters' additive method
## 
## Model Information:
## Holt-Winters' additive method 
## 
## Call:
##  hw(y = dulieu[, 5], seasonal = c("additive")) 
## 
##   Smoothing parameters:
##     alpha = 0.1572 
##     beta  = 0.003 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 1340.4399 
##     b = 3032.9358 
##     s = 19661.19 -29367 -12679.5 -22525.9 -25935.62 9676.476
##            13945.4 -40292.69 -62991.86 94276.35 87270.06 -31036.89
## 
##   sigma:  85646
## 
##      AIC     AICc      BIC 
## 2862.102 2869.219 2907.057 
## 
## Error measures:
##                     ME     RMSE      MAE MPE MAPE      MASE       ACF1
## Training set -8405.709 78782.87 59239.63 NaN  Inf 0.7973755 0.06419114
## 
## Forecasts:
##          Point Forecast      Lo 80    Hi 80      Lo 95    Hi 95
## Jun 2019      41690.998  -68068.77 151450.8 -126172.09 209554.1
## Jul 2019      51970.466  -59187.90 163128.8 -118031.59 221972.5
## Aug 2019      35715.461  -76875.58 148306.5 -136477.67 207908.6
## Sep 2019      85176.393  -28881.04 199233.8  -89259.40 259612.2
## Oct 2019      34913.733  -80643.45 150470.9 -141815.73 211643.2
## Nov 2019     153641.578   36551.64 270731.5  -25432.02 332715.2
## Dec 2019     161079.978   42424.69 279735.3  -20387.63 342547.6
## Jan 2020       4265.548 -115987.32 124518.4 -179645.34 188176.4
## Feb 2020      27390.880  -94491.40 149273.2 -159011.98 213793.7
## Mar 2020      82055.065  -41488.05 205598.2 -106887.83 270998.0
## Apr 2020      78215.761  -47019.23 203450.8 -113314.63 269746.2
## May 2020      43061.455  -83896.05 170019.0 -151103.29 237226.2
## Jun 2020      46884.185  -81827.86 175596.2 -149963.91 243732.3
## Jul 2020      57163.653  -73330.94 187658.2 -142410.60 256737.9
## Aug 2020      40908.649  -91397.92 173215.2 -161436.78 243254.1
## Sep 2020      90369.580  -43777.99 224517.2 -114791.43 295530.6
## Oct 2020      40106.921  -95910.29 176124.1 -167913.46 248127.3
## Nov 2020     158834.765   20919.67 296749.9  -52088.18 369757.7
## Dec 2020     166273.165   26432.32 306114.0  -47594.95 380141.3
## Jan 2021       9458.735 -132335.32 151252.8 -207396.56 226314.0
## Feb 2021      32584.068 -111190.30 176358.4 -187299.85 252468.0
## Mar 2021      87248.252  -58533.15 233029.7 -135705.16 310201.7
## Apr 2021      83408.949  -64405.84 231223.7 -142654.26 309472.2
## May 2021      48254.643 -101619.53 198128.8 -180958.12 277467.4
uocluong3 <- fitted(hoiquy3)
MSE(uocluong3,hoiquy3$x)
## [1] 6206739862

6. Dự báo

# Đồ thị ước lượng và dự báo theo ARIMA thủ công
plot(hoiquy1$x,col="red")
lines(fitted(hoiquy1),col="blue")

dubao1 <- forecast(hoiquy1, h=19)
dubao1
##          Point Forecast      Lo 80    Hi 80      Lo 95    Hi 95
## Jun 2019       77917.73 -34747.122 190582.6  -94388.29 250223.8
## Jul 2019       26232.26 -90278.371 142742.9 -151955.37 204419.9
## Aug 2019       31561.07 -85037.465 148159.6 -146761.00 209883.1
## Sep 2019      110121.18  -7223.791 227466.2  -69342.47 289584.8
## Oct 2019       56777.46 -65419.124 178974.0 -130106.09 243661.0
## Nov 2019      133119.44   7995.373 258243.5  -58241.31 324480.2
## Dec 2019      153138.95  26866.732 279411.2  -39977.74 346255.6
## Jan 2020       30107.54 -97626.608 157841.7 -165244.98 225460.1
## Feb 2020       43807.28 -86167.785 173782.3 -154972.43 242587.0
## Mar 2020       87181.87 -44889.482 219253.2 -114803.83 289167.6
## Apr 2020       73940.18 -59830.252 207710.6 -130644.04 278524.4
## May 2020       96988.51 -38480.913 232457.9 -110194.10 304171.1
## Jun 2020       70512.19 -73257.859 214282.2 -149365.13 290389.5
## Jul 2020       64015.03 -82660.949 210691.0 -160306.52 288336.6
## Aug 2020       52838.66 -95166.801 200844.1 -173516.16 279193.5
## Sep 2020      112841.20 -37260.745 262943.2 -116719.92 342402.3
## Oct 2020       63288.65 -89647.793 216225.1 -170607.46 297184.8
## Nov 2020      161535.99   6079.178 316992.8  -76214.69 399286.7
## Dec 2020      169870.65  12316.453 327424.8  -71087.70 410829.0
plot(dubao1)

# Đồ thị ước lượng và dự báo ARIMA tự động
plot(hoiquy2$x,col="red")
lines(fitted(hoiquy1),col="blue")

dubao2 <- forecast(hoiquy2, h=19)
dubao2
##          Point Forecast     Lo 80    Hi 80      Lo 95    Hi 95
## Jun 2019       59461.46 -52196.51 171119.4 -111304.66 230227.6
## Jul 2019       82470.70 -29993.20 194934.6  -89527.99 254469.4
## Aug 2019       68890.75 -44373.34 182154.8 -104331.73 242113.2
## Sep 2019       48136.09 -65922.58 162194.8 -126301.60 222573.8
## Oct 2019       48136.09 -66711.67 162983.9 -127508.40 223780.6
## Nov 2019       56201.87 -59429.59 171833.3 -120641.19 233044.9
## Dec 2019       80895.08 -35514.81 197305.0  -97138.48 258928.6
## Jan 2020       50219.12 -66964.02 167402.3 -128997.03 229435.3
## Feb 2020       48136.09 -69815.23 166087.4 -132254.89 228527.1
## Mar 2020       60354.36 -58360.18 179068.9 -121203.86 241912.6
## Apr 2020       48136.09 -71336.79 167609.0 -134581.91 230854.1
## May 2020       99812.57 -20413.87 220039.0  -84057.90 283683.0
## Jun 2020       62901.57 -64177.50 189980.6 -131449.10 257252.2
## Jul 2020       68615.22 -59566.53 196797.0 -127421.85 264652.3
## Aug 2020       65243.05 -64031.98 194518.1 -132466.05 262952.1
## Sep 2020       60089.25 -70269.89 190448.4 -139277.84 259456.3
## Oct 2020       60089.25 -71345.05 191523.6 -140922.17 261100.7
## Nov 2020       62092.15 -70408.60 194592.9 -140550.26 264734.5
## Dec 2020       68223.97 -65334.71 201782.6 -136036.40 272484.3
plot(dubao2)

# Đồ thị ước lượng và dự báo  Holt Winter
plot(hoiquy3$x,col="red")
lines(fitted(hoiquy3),col="blue")

dubao3 <- forecast(hoiquy3, h=19)
dubao3
##          Point Forecast      Lo 80    Hi 80      Lo 95    Hi 95
## Jun 2019      41690.998  -68068.77 151450.8 -126172.09 209554.1
## Jul 2019      51970.466  -59187.90 163128.8 -118031.59 221972.5
## Aug 2019      35715.461  -76875.58 148306.5 -136477.67 207908.6
## Sep 2019      85176.393  -28881.04 199233.8  -89259.40 259612.2
## Oct 2019      34913.733  -80643.45 150470.9 -141815.73 211643.2
## Nov 2019     153641.578   36551.64 270731.5  -25432.02 332715.2
## Dec 2019     161079.978   42424.69 279735.3  -20387.63 342547.6
## Jan 2020       4265.548 -115987.32 124518.4 -179645.34 188176.4
## Feb 2020      27390.880  -94491.40 149273.2 -159011.98 213793.7
## Mar 2020      82055.065  -41488.05 205598.2 -106887.83 270998.0
## Apr 2020      78215.761  -47019.23 203450.8 -113314.63 269746.2
## May 2020      43061.455  -83896.05 170019.0 -151103.29 237226.2
## Jun 2020      46884.185  -81827.86 175596.2 -149963.91 243732.3
## Jul 2020      57163.653  -73330.94 187658.2 -142410.60 256737.9
## Aug 2020      40908.649  -91397.92 173215.2 -161436.78 243254.1
## Sep 2020      90369.580  -43777.99 224517.2 -114791.43 295530.6
## Oct 2020      40106.921  -95910.29 176124.1 -167913.46 248127.3
## Nov 2020     158834.765   20919.67 296749.9  -52088.18 369757.7
## Dec 2020     166273.165   26432.32 306114.0  -47594.95 380141.3
plot(dubao3)

Kiểm định theo yêu cầu

#Ljung-Box test

checkresiduals(hoiquy1)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(4,1,1)(1,1,1)[12]
## Q* = 13.287, df = 14, p-value = 0.504
## 
## Model df: 7.   Total lags used: 21
#Kiem dinh khong chech
uocluong <-fitted(hoiquy1)
thucte <-hoiquy1$x
phandu <-resid(hoiquy1)
dulieu2 <-data.frame(uocluong,thucte, phandu)
tinh1 <-lm(thucte~uocluong, dulieu2)
tinh2 <-lm(phandu ~ 1,data=dulieu2)


#Mincer - Zarnowitz test
linearHypothesis(tinh1, c("(Intercept) = 0", "uocluong = 1"))
## Linear hypothesis test
## 
## Hypothesis:
## (Intercept) = 0
## uocluong = 1
## 
## Model 1: restricted model
## Model 2: thucte ~ uocluong
## 
##   Res.Df        RSS Df  Sum of Sq      F Pr(>F)
## 1    104 5.7951e+11                            
## 2    102 5.5770e+11  2 2.1811e+10 1.9945 0.1413
#Holden- Peel test
linearHypothesis(tinh2, c("(Intercept) = 0"))
## Linear hypothesis test
## 
## Hypothesis:
## (Intercept) = 0
## 
## Model 1: restricted model
## Model 2: phandu ~ 1
## 
##   Res.Df        RSS Df  Sum of Sq      F Pr(>F)
## 1    104 5.7951e+11                            
## 2    103 5.7651e+11  1 3007820808 0.5374 0.4652