Kết nối dữ liệu

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readxl)
setwd("c:/Users/Admin/Desktop/DuAnR/ARIMA")
dulieu <-read_excel("vnindex.xlsx")
head(dulieu)
## # A tibble: 6 x 3
##   VNINDEX DATE                     DVN
##     <dbl> <dttm>                 <dbl>
## 1    878. 2019-01-03 00:00:00 -0.0152 
## 2    881. 2019-01-04 00:00:00  0.00305
## 3    890. 2019-01-07 00:00:00  0.00992
## 4    887. 2019-01-08 00:00:00 -0.00247
## 5    897. 2019-01-09 00:00:00  0.0108 
## 6    898. 2019-01-10 00:00:00  0.00146
dulieu<- dulieu%>%select(DVN)
head(dulieu)
## # A tibble: 6 x 1
##        DVN
##      <dbl>
## 1 -0.0152 
## 2  0.00305
## 3  0.00992
## 4 -0.00247
## 5  0.0108 
## 6  0.00146

Chọn p d q tự động

library(forecast)
## Registered S3 method overwritten by 'xts':
##   method     from
##   as.zoo.xts zoo
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Registered S3 methods overwritten by 'forecast':
##   method             from    
##   fitted.fracdiff    fracdiff
##   residuals.fracdiff fracdiff
auto.arima(dulieu$DVN)
## Series: dulieu$DVN 
## ARIMA(4,0,3) with zero mean 
## 
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
##           ar1      ar2     ar3      ar4     ma1     ma2      ma3
##       -0.9159  -0.0062  0.4862  -0.1057  1.1745  0.2718  -0.3855
## s.e.      NaN      NaN     NaN      NaN     NaN     NaN      NaN
## 
## sigma^2 estimated as 0.0001132:  log likelihood=1033.89
## AIC=-2051.78   AICc=-2051.34   BIC=-2021.39

Chọ p q phù hợp

kqua=data.frame(Model=0,AIC=0)
n=0
for (i in 0:4){
  for (j in 0:3){
    n=n+1
    temp=Arima(dulieu$DVN,c(i,0,j),seasonal = c(0,1,0))
    kqua[n,]=c(i+j/10,temp$aic)
    
  }
}

plot(kqua[c(1:60),],col=c(1,2,3,4),pch=20)

Chọn mô hình phù hợp bởi AIC

kqua
##    Model       AIC
## 1    0.0 -2030.421
## 2    0.1 -2042.324
## 3    0.2 -2041.115
## 4    0.3 -2039.614
## 5    1.0 -2043.169
## 6    1.1 -2041.222
## 7    1.2 -2039.234
## 8    1.3 -2039.665
## 9    2.0 -2041.223
## 10   2.1 -2040.581
## 11   2.2 -2061.786
## 12   2.3 -2062.514
## 13   3.0 -2039.223
## 14   3.1 -2038.603
## 15   3.2 -2054.045
## 16   3.3 -2062.161
## 17   4.0 -2038.113
## 18   4.1 -2039.398
## 19   4.2 -2052.046
## 20   4.3 -2050.968
kqua[which.min(kqua$AIC),]
##    Model       AIC
## 12   2.3 -2062.514

Hồi quy ARIMA

pacf(dulieu$DVN)

acf(dulieu$DVN)

arima(dulieu$DVN,order=c(2,0,3))
## 
## Call:
## arima(x = dulieu$DVN, order = c(2, 0, 3))
## 
## Coefficients:
##          ar1      ar2      ma1     ma2     ma3  intercept
##       0.4787  -0.8972  -0.2889  0.9594  0.1034     -4e-04
## s.e.  0.0343   0.0281   0.0666  0.0363  0.0631      7e-04
## 
## sigma^2 estimated as 0.0001064:  log likelihood = 1038.26,  aic = -2062.51

Chuẩn bị dữ liệu Neuron

# Đếm số hàng
nrow(dulieu)
## [1] 330
# Chuẩn  hoá Min-Max
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
dulieu <-dulieu%>%mutate(DVN2 = (DVN-min(DVN))/(max(DVN)-min(DVN)))
dulieu <-dulieu%>% mutate(AR1 = lag(DVN2,1), AR2=lag(DVN2,2))
dulieu <- dulieu %>% mutate(MA1 = rollmean(DVN2,k=1,fill = NA), MA2=rollmean(DVN2,k=2,fill = NA), MA3=rollmean(DVN2,k=3,fill = NA))

head(dulieu)
## # A tibble: 6 x 7
##        DVN  DVN2    AR1    AR2   MA1   MA2    MA3
##      <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl>  <dbl>
## 1 -0.0152  0.423 NA     NA     0.423 0.504 NA    
## 2  0.00305 0.585  0.423 NA     0.585 0.615  0.551
## 3  0.00992 0.646  0.585  0.423 0.646 0.591  0.589
## 4 -0.00247 0.536  0.646  0.585 0.536 0.594  0.612
## 5  0.0108  0.653  0.536  0.646 0.653 0.612  0.586
## 6  0.00146 0.571  0.653  0.536 0.571 0.586  0.608
tail(dulieu)
## # A tibble: 6 x 7
##        DVN  DVN2   AR1   AR2   MA1    MA2    MA3
##      <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>  <dbl>
## 1  0.00355 0.589 0.615 0.582 0.589  0.540  0.565
## 2 -0.00758 0.490 0.589 0.615 0.490  0.503  0.532
## 3 -0.00462 0.517 0.490 0.589 0.517  0.548  0.529
## 4  0.00248 0.580 0.517 0.490 0.580  0.530  0.526
## 5 -0.00863 0.481 0.580 0.517 0.481  0.529  0.546
## 6  0.00222 0.577 0.481 0.580 0.577 NA     NA
congthuc <- DVN2~ AR1 + AR2 + MA1 + MA2 + MA3 
hoiquy <-lm(congthuc,data=dulieu)
summary(hoiquy)
## Warning in summary.lm(hoiquy): essentially perfect fit: summary may be
## unreliable
## 
## Call:
## lm(formula = congthuc, data = dulieu)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.569e-16  3.200e-18  7.710e-18  1.088e-17  3.838e-16 
## 
## Coefficients: (1 not defined because of singularities)
##               Estimate Std. Error    t value Pr(>|t|)    
## (Intercept)  5.177e-17  2.101e-17  2.464e+00   0.0143 *  
## AR1         -9.336e-17  2.310e-17 -4.042e+00 6.64e-05 ***
## AR2          0.000e+00  2.255e-17  0.000e+00   1.0000    
## MA1          1.000e+00  3.544e-17  2.822e+16  < 2e-16 ***
## MA2         -5.088e-17  4.524e-17 -1.125e+00   0.2616    
## MA3                 NA         NA         NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.942e-17 on 322 degrees of freedom
##   (3 observations deleted due to missingness)
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 5.127e+32 on 4 and 322 DF,  p-value: < 2.2e-16
dubao <-arima(dulieu$DVN2,order=c(2,0,3))
dubao
## 
## Call:
## arima(x = dulieu$DVN2, order = c(2, 0, 3))
## 
## Coefficients:
##          ar1      ar2      ma1     ma2     ma3  intercept
##       0.4787  -0.8972  -0.2890  0.9595  0.1033     0.5537
## s.e.  0.0343   0.0280   0.0666  0.0363  0.0631     0.0063
## 
## sigma^2 estimated as 0.008395:  log likelihood = 317.46,  aic = -620.92

Phân tích neuron

dulieu <-na.omit(dulieu)
nrow(dulieu)
## [1] 327
dulieuTrain <-dulieu[1:290,]
dulieuTest <- dulieu[291:327,]

library(neuralnet)
## 
## Attaching package: 'neuralnet'
## The following object is masked from 'package:dplyr':
## 
##     compute
nn <-neuralnet(congthuc, data=dulieuTrain, hidden=5, linear.output=FALSE)
nn$result.matrix
##                                [,1]
## error                   0.013641928
## reached.threshold       0.008713881
## steps                 316.000000000
## Intercept.to.1layhid1  -1.082910945
## AR1.to.1layhid1        -0.234402367
## AR2.to.1layhid1        -1.239895756
## MA1.to.1layhid1         5.029216699
## MA2.to.1layhid1        -0.629387261
## MA3.to.1layhid1        -0.387793766
## Intercept.to.1layhid2   1.002299029
## AR1.to.1layhid2         0.559408543
## AR2.to.1layhid2        -1.138643564
## MA1.to.1layhid2        -2.837035793
## MA2.to.1layhid2         0.640944766
## MA3.to.1layhid2        -0.796189319
## Intercept.to.1layhid3  13.701720573
## AR1.to.1layhid3        13.171240068
## AR2.to.1layhid3        13.386710543
## MA1.to.1layhid3         9.239833241
## MA2.to.1layhid3        10.617656866
## MA3.to.1layhid3        13.311422184
## Intercept.to.1layhid4   1.223542595
## AR1.to.1layhid4        -0.671480369
## AR2.to.1layhid4         0.138962047
## MA1.to.1layhid4        -2.698074159
## MA2.to.1layhid4        -2.017496245
## MA3.to.1layhid4        -0.246615248
## Intercept.to.1layhid5  -0.142837296
## AR1.to.1layhid5         0.254820869
## AR2.to.1layhid5         1.256402948
## MA1.to.1layhid5        -2.893619942
## MA2.to.1layhid5        -1.110927880
## MA3.to.1layhid5        -1.312825365
## Intercept.to.DVN2       0.941673824
## 1layhid1.to.DVN2        0.906687931
## 1layhid2.to.DVN2       -2.745465240
## 1layhid3.to.DVN2       -0.084255853
## 1layhid4.to.DVN2       -0.081908840
## 1layhid5.to.DVN2       -4.031677811
plot(nn, rep = "best")

nnketqua <- compute(nn, dulieuTest)
ketqua <-data.frame(thucte =dulieuTest$DVN2, uocluong =nnketqua$net.result)
ketqua
##        thucte   uocluong
## 1  0.57895345 0.58531176
## 2  0.28019350 0.23993779
## 3  0.09660974 0.09556336
## 4  0.47131231 0.48769087
## 5  0.39524692 0.39640568
## 6  0.53287267 0.54804131
## 7  0.57997577 0.57263032
## 8  0.29950021 0.25194791
## 9  0.35920811 0.27728316
## 10 0.01758944 0.07084164
## 11 0.45922573 0.51247263
## 12 0.97589064 0.76863983
## 13 0.60854794 0.59062496
## 14 0.58125548 0.51057890
## 15 0.12619105 0.11760272
## 16 0.56120287 0.59468281
## 17 0.79491920 0.71071411
## 18 0.83928584 0.74229341
## 19 1.00000000 0.79147813
## 20 0.67743875 0.64052655
## 21 0.57340480 0.55818798
## 22 0.70378012 0.67577713
## 23 0.52965581 0.53347105
## 24 0.64959093 0.63982584
## 25 0.57637438 0.58134951
## 26 0.67114520 0.65621245
## 27 0.59735827 0.59801939
## 28 0.65885681 0.64694853
## 29 0.61799909 0.59516395
## 30 0.24322699 0.21038859
## 31 0.58167773 0.59832447
## 32 0.61523364 0.60766270
## 33 0.58914862 0.58533254
## 34 0.49020822 0.48611200
## 35 0.51654874 0.52450037
## 36 0.57958182 0.58171854
## 37 0.48088376 0.48480822
plot(ketqua)

Tính độ chính xác

# Chuyển dữ liệu về ban đầu
#predict_testNN = (predict_testNN$net.result * (max(data$rating) - min(data$rating))) + min(data$rating)

#uluong=ketqua$uocluong * abs(diff(range(ketqua$thucte))) + min(ketqua$thucte)
#thte=ketqua$thucte * abs(diff(range(ketqua$thucte))) + min(ketqua$thucte)
uluong = ketqua$uocluong * (max(dulieu$DVN) - min(dulieu$DVN)) - min(dulieu$DVN)
thte = ketqua$thucte * (max(dulieu$DVN) - min(dulieu$DVN)) - min(dulieu$DVN)
dolech=((thte-uluong)/uluong)
sosanhdulieu=data.frame(uluong,thte,dolech)
head(sosanhdulieu)
##       uluong       thte       dolech
## 1 0.12864873 0.12793301 -0.005563345
## 2 0.08977201 0.09430336  0.050476191
## 3 0.07352063 0.07363841  0.001602056
## 4 0.11766012 0.11581648 -0.015669185
## 5 0.10738468 0.10725424 -0.001214648
## 6 0.12445341 0.12274597 -0.013719547
chinhxac=1-abs(mean(sosanhdulieu$dolech))
chinhxac
## [1] 0.9810792
RMSE.NN = (sum((sosanhdulieu$thte - sosanhdulieu$uluong)^2) / nrow(sosanhdulieu)) ^ 0.5
RMSE.NN
## [1] 0.006703716

Dự báo

plot(forecast(dubao,h=30))
points(1:length(dulieu$DVN),fitted(dubao),type="l",col="green")

accuracy(dubao)
##                        ME       RMSE        MAE  MPE MAPE      MASE
## Training set 0.0003441617 0.09172036 0.06086056 -Inf  Inf 0.7078668
##                       ACF1
## Training set -0.0005944457
fit <- ets(dulieu$DVN)
plot(forecast(fit,h=30))
points(1:length(dulieu$DVN),fitted(fit),type="l",col="red")

accuracy(fit)
##                         ME       RMSE         MAE     MPE     MAPE      MASE
## Training set -0.0006064157 0.01111057 0.006928066 98.5316 135.9322 0.7158591
##                  ACF1
## Training set 0.198281
#ANN
#Feed-forward neural networks with a single hidden layer and lagged inputs for forecasting univariate time series
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
 fitANN <- nnetar(dulieu$DVN)
 plot(forecast(fitANN,h=30))
 points(1:length(dulieu$DVN),fitted(fitANN),type="l",col="green")

 accuracy(fitANN)
##                        ME        RMSE         MAE      MPE     MAPE      MASE
## Training set 2.701532e-05 0.002854885 0.002127617 50.67395 92.71577 0.2198412
##                     ACF1
## Training set -0.02484404
 ## FFNN
 plot(forecast(uluong, h=30))

 plot(forecast(thte, h=30))