Kết nối dữ liệu
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
setwd("c:/Users/Admin/Desktop/DuAnR/ARIMA")
dulieu <-read_excel("vnindex.xlsx")
head(dulieu)
## # A tibble: 6 x 3
## VNINDEX DATE DVN
## <dbl> <dttm> <dbl>
## 1 878. 2019-01-03 00:00:00 -0.0152
## 2 881. 2019-01-04 00:00:00 0.00305
## 3 890. 2019-01-07 00:00:00 0.00992
## 4 887. 2019-01-08 00:00:00 -0.00247
## 5 897. 2019-01-09 00:00:00 0.0108
## 6 898. 2019-01-10 00:00:00 0.00146
dulieu<- dulieu%>%select(DVN)
head(dulieu)
## # A tibble: 6 x 1
## DVN
## <dbl>
## 1 -0.0152
## 2 0.00305
## 3 0.00992
## 4 -0.00247
## 5 0.0108
## 6 0.00146
Chọn p d q tự động
library(forecast)
## Registered S3 method overwritten by 'xts':
## method from
## as.zoo.xts zoo
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Registered S3 methods overwritten by 'forecast':
## method from
## fitted.fracdiff fracdiff
## residuals.fracdiff fracdiff
auto.arima(dulieu$DVN)
## Series: dulieu$DVN
## ARIMA(4,0,3) with zero mean
##
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
## ar1 ar2 ar3 ar4 ma1 ma2 ma3
## -0.9159 -0.0062 0.4862 -0.1057 1.1745 0.2718 -0.3855
## s.e. NaN NaN NaN NaN NaN NaN NaN
##
## sigma^2 estimated as 0.0001132: log likelihood=1033.89
## AIC=-2051.78 AICc=-2051.34 BIC=-2021.39
Chọ p q phù hợp
kqua=data.frame(Model=0,AIC=0)
n=0
for (i in 0:4){
for (j in 0:3){
n=n+1
temp=Arima(dulieu$DVN,c(i,0,j),seasonal = c(0,1,0))
kqua[n,]=c(i+j/10,temp$aic)
}
}
plot(kqua[c(1:60),],col=c(1,2,3,4),pch=20)

Chọn mô hình phù hợp bởi AIC
kqua
## Model AIC
## 1 0.0 -2030.421
## 2 0.1 -2042.324
## 3 0.2 -2041.115
## 4 0.3 -2039.614
## 5 1.0 -2043.169
## 6 1.1 -2041.222
## 7 1.2 -2039.234
## 8 1.3 -2039.665
## 9 2.0 -2041.223
## 10 2.1 -2040.581
## 11 2.2 -2061.786
## 12 2.3 -2062.514
## 13 3.0 -2039.223
## 14 3.1 -2038.603
## 15 3.2 -2054.045
## 16 3.3 -2062.161
## 17 4.0 -2038.113
## 18 4.1 -2039.398
## 19 4.2 -2052.046
## 20 4.3 -2050.968
kqua[which.min(kqua$AIC),]
## Model AIC
## 12 2.3 -2062.514
Hồi quy ARIMA
pacf(dulieu$DVN)

acf(dulieu$DVN)

arima(dulieu$DVN,order=c(2,0,3))
##
## Call:
## arima(x = dulieu$DVN, order = c(2, 0, 3))
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3 intercept
## 0.4787 -0.8972 -0.2889 0.9594 0.1034 -4e-04
## s.e. 0.0343 0.0281 0.0666 0.0363 0.0631 7e-04
##
## sigma^2 estimated as 0.0001064: log likelihood = 1038.26, aic = -2062.51
Chuẩn bị dữ liệu Neuron
# Đếm số hàng
nrow(dulieu)
## [1] 330
# Chuẩn hoá Min-Max
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
dulieu <-dulieu%>%mutate(DVN2 = (DVN-min(DVN))/(max(DVN)-min(DVN)))
dulieu <-dulieu%>% mutate(AR1 = lag(DVN2,1), AR2=lag(DVN2,2))
dulieu <- dulieu %>% mutate(MA1 = rollmean(DVN2,k=1,fill = NA), MA2=rollmean(DVN2,k=2,fill = NA), MA3=rollmean(DVN2,k=3,fill = NA))
head(dulieu)
## # A tibble: 6 x 7
## DVN DVN2 AR1 AR2 MA1 MA2 MA3
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 -0.0152 0.423 NA NA 0.423 0.504 NA
## 2 0.00305 0.585 0.423 NA 0.585 0.615 0.551
## 3 0.00992 0.646 0.585 0.423 0.646 0.591 0.589
## 4 -0.00247 0.536 0.646 0.585 0.536 0.594 0.612
## 5 0.0108 0.653 0.536 0.646 0.653 0.612 0.586
## 6 0.00146 0.571 0.653 0.536 0.571 0.586 0.608
tail(dulieu)
## # A tibble: 6 x 7
## DVN DVN2 AR1 AR2 MA1 MA2 MA3
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.00355 0.589 0.615 0.582 0.589 0.540 0.565
## 2 -0.00758 0.490 0.589 0.615 0.490 0.503 0.532
## 3 -0.00462 0.517 0.490 0.589 0.517 0.548 0.529
## 4 0.00248 0.580 0.517 0.490 0.580 0.530 0.526
## 5 -0.00863 0.481 0.580 0.517 0.481 0.529 0.546
## 6 0.00222 0.577 0.481 0.580 0.577 NA NA
congthuc <- DVN2~ AR1 + AR2 + MA1 + MA2 + MA3
hoiquy <-lm(congthuc,data=dulieu)
summary(hoiquy)
## Warning in summary.lm(hoiquy): essentially perfect fit: summary may be
## unreliable
##
## Call:
## lm(formula = congthuc, data = dulieu)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.569e-16 3.200e-18 7.710e-18 1.088e-17 3.838e-16
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.177e-17 2.101e-17 2.464e+00 0.0143 *
## AR1 -9.336e-17 2.310e-17 -4.042e+00 6.64e-05 ***
## AR2 0.000e+00 2.255e-17 0.000e+00 1.0000
## MA1 1.000e+00 3.544e-17 2.822e+16 < 2e-16 ***
## MA2 -5.088e-17 4.524e-17 -1.125e+00 0.2616
## MA3 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.942e-17 on 322 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 5.127e+32 on 4 and 322 DF, p-value: < 2.2e-16
dubao <-arima(dulieu$DVN2,order=c(2,0,3))
dubao
##
## Call:
## arima(x = dulieu$DVN2, order = c(2, 0, 3))
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3 intercept
## 0.4787 -0.8972 -0.2890 0.9595 0.1033 0.5537
## s.e. 0.0343 0.0280 0.0666 0.0363 0.0631 0.0063
##
## sigma^2 estimated as 0.008395: log likelihood = 317.46, aic = -620.92
Phân tích neuron
dulieu <-na.omit(dulieu)
nrow(dulieu)
## [1] 327
dulieuTrain <-dulieu[1:290,]
dulieuTest <- dulieu[291:327,]
library(neuralnet)
##
## Attaching package: 'neuralnet'
## The following object is masked from 'package:dplyr':
##
## compute
nn <-neuralnet(congthuc, data=dulieuTrain, hidden=5, linear.output=FALSE)
nn$result.matrix
## [,1]
## error 0.013641928
## reached.threshold 0.008713881
## steps 316.000000000
## Intercept.to.1layhid1 -1.082910945
## AR1.to.1layhid1 -0.234402367
## AR2.to.1layhid1 -1.239895756
## MA1.to.1layhid1 5.029216699
## MA2.to.1layhid1 -0.629387261
## MA3.to.1layhid1 -0.387793766
## Intercept.to.1layhid2 1.002299029
## AR1.to.1layhid2 0.559408543
## AR2.to.1layhid2 -1.138643564
## MA1.to.1layhid2 -2.837035793
## MA2.to.1layhid2 0.640944766
## MA3.to.1layhid2 -0.796189319
## Intercept.to.1layhid3 13.701720573
## AR1.to.1layhid3 13.171240068
## AR2.to.1layhid3 13.386710543
## MA1.to.1layhid3 9.239833241
## MA2.to.1layhid3 10.617656866
## MA3.to.1layhid3 13.311422184
## Intercept.to.1layhid4 1.223542595
## AR1.to.1layhid4 -0.671480369
## AR2.to.1layhid4 0.138962047
## MA1.to.1layhid4 -2.698074159
## MA2.to.1layhid4 -2.017496245
## MA3.to.1layhid4 -0.246615248
## Intercept.to.1layhid5 -0.142837296
## AR1.to.1layhid5 0.254820869
## AR2.to.1layhid5 1.256402948
## MA1.to.1layhid5 -2.893619942
## MA2.to.1layhid5 -1.110927880
## MA3.to.1layhid5 -1.312825365
## Intercept.to.DVN2 0.941673824
## 1layhid1.to.DVN2 0.906687931
## 1layhid2.to.DVN2 -2.745465240
## 1layhid3.to.DVN2 -0.084255853
## 1layhid4.to.DVN2 -0.081908840
## 1layhid5.to.DVN2 -4.031677811
plot(nn, rep = "best")

nnketqua <- compute(nn, dulieuTest)
ketqua <-data.frame(thucte =dulieuTest$DVN2, uocluong =nnketqua$net.result)
ketqua
## thucte uocluong
## 1 0.57895345 0.58531176
## 2 0.28019350 0.23993779
## 3 0.09660974 0.09556336
## 4 0.47131231 0.48769087
## 5 0.39524692 0.39640568
## 6 0.53287267 0.54804131
## 7 0.57997577 0.57263032
## 8 0.29950021 0.25194791
## 9 0.35920811 0.27728316
## 10 0.01758944 0.07084164
## 11 0.45922573 0.51247263
## 12 0.97589064 0.76863983
## 13 0.60854794 0.59062496
## 14 0.58125548 0.51057890
## 15 0.12619105 0.11760272
## 16 0.56120287 0.59468281
## 17 0.79491920 0.71071411
## 18 0.83928584 0.74229341
## 19 1.00000000 0.79147813
## 20 0.67743875 0.64052655
## 21 0.57340480 0.55818798
## 22 0.70378012 0.67577713
## 23 0.52965581 0.53347105
## 24 0.64959093 0.63982584
## 25 0.57637438 0.58134951
## 26 0.67114520 0.65621245
## 27 0.59735827 0.59801939
## 28 0.65885681 0.64694853
## 29 0.61799909 0.59516395
## 30 0.24322699 0.21038859
## 31 0.58167773 0.59832447
## 32 0.61523364 0.60766270
## 33 0.58914862 0.58533254
## 34 0.49020822 0.48611200
## 35 0.51654874 0.52450037
## 36 0.57958182 0.58171854
## 37 0.48088376 0.48480822
plot(ketqua)

Tính độ chính xác
# Chuyển dữ liệu về ban đầu
#predict_testNN = (predict_testNN$net.result * (max(data$rating) - min(data$rating))) + min(data$rating)
#uluong=ketqua$uocluong * abs(diff(range(ketqua$thucte))) + min(ketqua$thucte)
#thte=ketqua$thucte * abs(diff(range(ketqua$thucte))) + min(ketqua$thucte)
uluong = ketqua$uocluong * (max(dulieu$DVN) - min(dulieu$DVN)) - min(dulieu$DVN)
thte = ketqua$thucte * (max(dulieu$DVN) - min(dulieu$DVN)) - min(dulieu$DVN)
dolech=((thte-uluong)/uluong)
sosanhdulieu=data.frame(uluong,thte,dolech)
head(sosanhdulieu)
## uluong thte dolech
## 1 0.12864873 0.12793301 -0.005563345
## 2 0.08977201 0.09430336 0.050476191
## 3 0.07352063 0.07363841 0.001602056
## 4 0.11766012 0.11581648 -0.015669185
## 5 0.10738468 0.10725424 -0.001214648
## 6 0.12445341 0.12274597 -0.013719547
chinhxac=1-abs(mean(sosanhdulieu$dolech))
chinhxac
## [1] 0.9810792
RMSE.NN = (sum((sosanhdulieu$thte - sosanhdulieu$uluong)^2) / nrow(sosanhdulieu)) ^ 0.5
RMSE.NN
## [1] 0.006703716
Dự báo
plot(forecast(dubao,h=30))
points(1:length(dulieu$DVN),fitted(dubao),type="l",col="green")

accuracy(dubao)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0003441617 0.09172036 0.06086056 -Inf Inf 0.7078668
## ACF1
## Training set -0.0005944457
fit <- ets(dulieu$DVN)
plot(forecast(fit,h=30))
points(1:length(dulieu$DVN),fitted(fit),type="l",col="red")

accuracy(fit)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.0006064157 0.01111057 0.006928066 98.5316 135.9322 0.7158591
## ACF1
## Training set 0.198281
#ANN
#Feed-forward neural networks with a single hidden layer and lagged inputs for forecasting univariate time series
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
fitANN <- nnetar(dulieu$DVN)
plot(forecast(fitANN,h=30))
points(1:length(dulieu$DVN),fitted(fitANN),type="l",col="green")

accuracy(fitANN)
## ME RMSE MAE MPE MAPE MASE
## Training set 2.701532e-05 0.002854885 0.002127617 50.67395 92.71577 0.2198412
## ACF1
## Training set -0.02484404
## FFNN
plot(forecast(uluong, h=30))

plot(forecast(thte, h=30))
