Problem Definition
In the xts data there are 5 columns.
The aim is to use the columns(RUB_sol & MFA_sol)
and predict 30 data data points each for two columns
Setup
Load Lobs
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tseries)
library(xts)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
library(forecast)
library(quantmod)
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
Read
dfrdata <- read.csv("C:/firstproject/xtsdata (3).csv",header = T,stringsAsFactors = F)
head(dfrdata)
## time15 RUB_sol MFA_sol NFA_sol NFY_sol SFY_baro_air NFA_baro_air
## 1 4/30/2012 0:15 11.929 12.689 11.26 8.19 9.43 13.134
## 2 4/30/2012 0:30 11.879 12.627 11.28 8.18 9.25 12.925
## 3 4/30/2012 0:45 11.828 12.570 11.26 8.21 9.03 12.736
## 4 4/30/2012 1:00 11.779 12.511 11.23 8.22 8.82 12.605
## 5 4/30/2012 1:15 11.730 12.459 11.17 8.23 8.60 12.455
## 6 4/30/2012 1:30 11.682 12.397 11.15 8.24 8.42 12.335
cat("\nClass:\n")
##
## Class:
class(dfrdata)
## [1] "data.frame"
#nrow(dfrdata)
xtsData
dfrdata$time15 <-as.POSIXlt(dfrdata$time15,format="%m/%d/%Y %H:%M")
class(dfrdata)
## [1] "data.frame"
xtsD1 <-xts(dfrdata$RUB_sol,order.by =dfrdata$time15)
names(xtsD1)[1] <-paste("RUB_sol")
class(xtsD1)
## [1] "xts" "zoo"
head(xtsD1)
## RUB_sol
## 2012-04-30 00:15:00 11.929
## 2012-04-30 00:30:00 11.879
## 2012-04-30 00:45:00 11.828
## 2012-04-30 01:00:00 11.779
## 2012-04-30 01:15:00 11.730
## 2012-04-30 01:30:00 11.682
xtsD2 <-xts(dfrdata$MFA_sol,order.by =dfrdata$time15)
names(xtsD2)[1] <-paste("MFA_sol")
class(xtsD2)
## [1] "xts" "zoo"
head(xtsD2)
## MFA_sol
## 2012-04-30 00:15:00 12.689
## 2012-04-30 00:30:00 12.627
## 2012-04-30 00:45:00 12.570
## 2012-04-30 01:00:00 12.511
## 2012-04-30 01:15:00 12.459
## 2012-04-30 01:30:00 12.397
Xts Info
cat("\n")
cat("\nSummary:\n")
##
## Summary:
summary(xtsD1)
## Index RUB_sol
## Min. :2012-04-30 00:15:00 Min. : 9.489
## 1st Qu.:2012-05-15 21:11:15 1st Qu.:13.906
## Median :2012-05-31 18:07:30 Median :16.043
## Mean :2012-05-31 18:07:30 Mean :15.956
## 3rd Qu.:2012-06-16 15:03:45 3rd Qu.:17.971
## Max. :2012-07-02 12:00:00 Max. :22.439
cat("\nStart:\n")
##
## Start:
start(xtsD1)
## [1] "2012-04-30 00:15:00 IST"
cat("\nEnds:\n")
##
## Ends:
end(xtsD1)
## [1] "2012-07-02 12:00:00 IST"
cat("\nFreq:\n")
##
## Freq:
frequency(xtsD1)
## [1] 0.001111111
cat("\nIndex:\n")
##
## Index:
head(index(xtsD1))
## [1] "2012-04-30 00:15:00 IST" "2012-04-30 00:30:00 IST"
## [3] "2012-04-30 00:45:00 IST" "2012-04-30 01:00:00 IST"
## [5] "2012-04-30 01:15:00 IST" "2012-04-30 01:30:00 IST"
cat("\nPeriodicity:\n")
##
## Periodicity:
periodicity(xtsD1)
## 15 minute periodicity from 2012-04-30 00:15:00 to 2012-07-02 12:00:00
cat("\nMonthly OHLC:\n")
##
## Monthly OHLC:
to.monthly(xtsD1)
## xtsD1.Open xtsD1.High xtsD1.Low xtsD1.Close
## Apr 2012 11.929 13.665 10.914 12.589
## May 2012 12.546 18.891 9.489 17.254
## Jun 2012 17.207 22.439 12.995 18.997
## Jul 2012 18.960 21.896 17.883 20.550
cat("\nMonthly Mean:\n")
##
## Monthly Mean:
lapply(split(xtsD1,f="months"),FUN=mean)
## [[1]]
## [1] 12.27673
##
## [[2]]
## [1] 14.09251
##
## [[3]]
## [1] 17.82781
##
## [[4]]
## [1] 19.42347
cat("\nSummary:\n")
##
## Summary:
summary(xtsD2)
## Index MFA_sol
## Min. :2012-04-30 00:15:00 Min. :10.54
## 1st Qu.:2012-05-15 21:11:15 1st Qu.:13.30
## Median :2012-05-31 18:07:30 Median :15.09
## Mean :2012-05-31 18:07:30 Mean :14.92
## 3rd Qu.:2012-06-16 15:03:45 3rd Qu.:16.45
## Max. :2012-07-02 12:00:00 Max. :19.71
cat("\nStart:\n")
##
## Start:
start(xtsD2)
## [1] "2012-04-30 00:15:00 IST"
cat("\nEnds:\n")
##
## Ends:
end(xtsD2)
## [1] "2012-07-02 12:00:00 IST"
cat("\nFreq:\n")
##
## Freq:
frequency(xtsD2)
## [1] 0.001111111
cat("\nIndex:\n")
##
## Index:
head(index(xtsD2))
## [1] "2012-04-30 00:15:00 IST" "2012-04-30 00:30:00 IST"
## [3] "2012-04-30 00:45:00 IST" "2012-04-30 01:00:00 IST"
## [5] "2012-04-30 01:15:00 IST" "2012-04-30 01:30:00 IST"
cat("\nPeriodicity:\n")
##
## Periodicity:
periodicity(xtsD2)
## 15 minute periodicity from 2012-04-30 00:15:00 to 2012-07-02 12:00:00
cat("\nMonthly OHLC:\n")
##
## Monthly OHLC:
to.monthly(xtsD2)
## xtsD2.Open xtsD2.High xtsD2.Low xtsD2.Close
## Apr 2012 12.689 13.855 11.579 12.769
## May 2012 12.701 17.203 10.541 15.834
## Jun 2012 15.796 19.709 13.883 16.295
## Jul 2012 16.252 18.691 15.603 17.819
cat("\nMonthly Mean:\n")
##
## Monthly Mean:
lapply(split(xtsD2,f="months"),FUN=mean)
## [[1]]
## [1] 12.69096
##
## [[2]]
## [1] 13.41241
##
## [[3]]
## [1] 16.45846
##
## [[4]]
## [1] 16.94893
Plot xts
autoplot(xtsD1, ts.colour='blue') +
labs(title="Times Series Plot") +
labs(x="Month") +
labs(y="RUB_sol")
## Warning: Ignoring unknown parameters: ts.colour
autoplot(xtsD2, ts.colour='blue') +
labs(title="Times Series Plot") +
labs(x="Month") +
labs(y="MFA_sol")
## Warning: Ignoring unknown parameters: ts.colour
ADF Test
# Augmented Dickey-Fuller Test
adf.test(xtsD1, alternative="stationary", k=0)
##
## Augmented Dickey-Fuller Test
##
## data: xtsD1
## Dickey-Fuller = -1.9731, Lag order = 0, p-value = 0.5898
## alternative hypothesis: stationary
Observation P Value is more than 0.05, this shows that data is not stationary
ADF TEST
# Augmented Dickey-Fuller Test
adf.test(xtsD1,alternative="stationary", k=0)
##
## Augmented Dickey-Fuller Test
##
## data: xtsD1
## Dickey-Fuller = -1.9731, Lag order = 0, p-value = 0.5898
## alternative hypothesis: stationary
The P value is greater then the printed P value
Observation P Value is more than 0.05, this shows that data is not stationary
Plot ACF
#tsData <-as.ts(xtsData)
acf(log(xtsD1))
# Auto Correlation Function
autoplot(acf(xtsD1, plot = FALSE))
Plot PACF
#acf(diff(log(xtsD1)))
autoplot(pacf(xtsD1, plot = FALSE))
Make ARIMA Model
# get arima model (find best model)
armModel <- auto.arima(xtsD1)
armModel
## Series: xtsD1
## ARIMA(5,1,1)
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ma1
## 1.9571 -1.0185 0.1645 -0.1499 0.0409 -0.9766
## s.e. 0.0131 0.0282 0.0309 0.0282 0.0130 0.0029
##
## sigma^2 estimated as 0.0003703: log likelihood=15431.69
## AIC=-30849.37 AICc=-30849.36 BIC=-30802.37
Forecast Using ARIMA Model
# forecast using
fcData <- forecast(armModel,h=30)
fcData
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 5486401 20.75225 20.72759 20.77691 20.71453 20.78996
## 5487301 20.94708 20.89236 21.00179 20.86340 21.03075
## 5488201 21.13238 21.04270 21.22205 20.99523 21.26953
## 5489101 21.30777 21.17789 21.43764 21.10914 21.50640
## 5490001 21.47257 21.29815 21.64698 21.20582 21.73931
## 5490901 21.62601 21.40354 21.84849 21.28577 21.96626
## 5491801 21.76750 21.49399 22.04101 21.34920 22.18580
## 5492701 21.89651 21.56949 22.22354 21.39637 22.39666
## 5493601 22.01261 21.63006 22.39516 21.42755 22.59767
## 5494501 22.11543 21.67577 22.55508 21.44303 22.78782
## 5495401 22.20469 21.70674 22.70263 21.44315 22.96623
## 5496301 22.28020 21.72316 22.83724 21.42828 23.13212
## 5497201 22.34186 21.72526 22.95846 21.39886 23.28486
## 5498101 22.38964 21.71336 23.06591 21.35537 23.42390
## 5499001 22.42358 21.68783 23.15933 21.29835 23.54882
## 5499901 22.44382 21.64908 23.23857 21.22837 23.65928
## 5500801 22.45057 21.59760 23.30354 21.14607 23.75508
## 5501701 22.44410 21.53393 23.35427 21.05211 23.83609
## 5502601 22.42475 21.45864 23.39087 20.94721 23.90230
## 5503501 22.39294 21.37236 23.41352 20.83210 23.95378
## 5504401 22.34914 21.27577 23.42251 20.70756 23.99072
## 5505301 22.29388 21.16957 23.41818 20.57440 24.01336
## 5506201 22.22773 21.05449 23.40097 20.43342 24.02205
## 5507101 22.15134 20.93131 23.37137 20.28547 24.01721
## 5508001 22.06537 20.80082 23.32993 20.13140 23.99935
## 5508901 21.97055 20.66381 23.27729 19.97206 23.96903
## 5509801 21.86760 20.52110 23.21411 19.80831 23.92690
## 5510701 21.75732 20.37353 23.14112 19.64100 23.87365
## 5511601 21.64050 20.22193 23.05908 19.47098 23.81003
## 5512501 21.51796 20.06711 22.96882 19.29908 23.73685
Observation Forecast using ARIMA model succesful created
Plot Forecast Using ARIMA Model
autoplot(fcData)
Observation The forecast values are refecleted in the graph.
ADF Test for XTSD2
# Augmented Dickey-Fuller Test
adf.test(xtsD2, alternative="stationary", k=0)
##
## Augmented Dickey-Fuller Test
##
## data: xtsD2
## Dickey-Fuller = -2.0628, Lag order = 0, p-value = 0.5517
## alternative hypothesis: stationary
Plot ACF for XTSD2
#Plots should be greater than zero
# Auto Correlation Function
autoplot(acf(xtsD2, plot = FALSE))
#plot in base
#tsData <- as.ts(xtsData)
acf(log(xtsD2))
Make ARIMA Model for XTSD2
# get arima model (find best model)
armModel <- auto.arima(xtsD2)
armModel
## Series: xtsD2
## ARIMA(4,1,4)
##
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
## ar1 ar2 ar3 ar4 ma1 ma2 ma3 ma4
## 0.4856 0.7682 -0.3657 -0.008 0.1278 -0.4095 0.2634 0.1029
## s.e. NaN NaN NaN NaN NaN 0.0508 NaN NaN
##
## sigma^2 estimated as 0.0009886: log likelihood=12441.01
## AIC=-24864.02 AICc=-24863.99 BIC=-24803.58
Forecast Using ARIMA Model for XTSD2
# forecast using
fcData <- forecast(armModel,h=30)
fcData
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 5486401 17.89291 17.85261 17.93320 17.83128 17.95453
## 5487301 17.96065 17.88416 18.03713 17.84367 18.07762
## 5488201 18.02405 17.90482 18.14328 17.84170 18.20640
## 5489101 18.08410 17.91551 18.25269 17.82627 18.34193
## 5490001 18.13661 17.91244 18.36078 17.79377 18.47945
## 5490901 18.18452 17.90132 18.46771 17.75141 18.61763
## 5491801 18.22566 17.88072 18.57060 17.69812 18.75320
## 5492701 18.26277 17.85509 18.67045 17.63927 18.88626
## 5493601 18.29446 17.82336 18.76555 17.57398 19.01493
## 5494501 18.32293 17.78872 18.85713 17.50593 19.13992
## 5495401 18.34720 17.75028 18.94412 17.43429 19.26012
## 5496301 18.36898 17.71028 19.02767 17.36159 19.37636
## 5497201 18.38754 17.66800 19.10708 17.28710 19.48798
## 5498101 18.40418 17.62504 19.18332 17.21258 19.59577
## 5499001 18.41836 17.58079 19.25593 17.13741 19.69931
## 5499901 18.43107 17.53642 19.32573 17.06282 19.79933
## 5500801 18.44191 17.49144 19.39238 16.98829 19.89553
## 5501701 18.45162 17.44667 19.45656 16.91469 19.98854
## 5502601 18.45990 17.40175 19.51804 16.84160 20.07819
## 5503501 18.46731 17.35727 19.57736 16.76965 20.16498
## 5504401 18.47364 17.31292 19.63436 16.69847 20.24880
## 5505301 18.47930 17.26913 19.68947 16.62851 20.33009
## 5506201 18.48413 17.22567 19.74259 16.55948 20.40878
## 5507101 18.48846 17.18285 19.79407 16.49170 20.48522
## 5508001 18.49215 17.14046 19.84383 16.42492 20.55937
## 5508901 18.49545 17.09875 19.89216 16.35938 20.63153
## 5509801 18.49827 17.05754 19.93900 16.29487 20.70168
## 5510701 18.50079 17.01701 19.98458 16.23154 20.77005
## 5511601 18.50295 16.97702 20.02887 16.16925 20.83665
## 5512501 18.50487 16.93770 20.07205 16.10808 20.90167
Observation Forecast using ARIMA model succesful created
Plot Forecast Using ARIMA Model for XTSD2
autoplot(fcData)
Observation The forecast values are refecleted in the graph.