Problem Definition
Predict 30 data points for the columns RUB_sol and MFA_sol.
Data Location
Data is present in the file xtsdata.csv.
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tseries)
library(xts)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
library(forecast)
library(quantmod)
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(ggfortify)
##
## Attaching package: 'ggfortify'
## The following object is masked from 'package:forecast':
##
## gglagplot
dfrdata <- read.csv("./data/xtsdata.csv",header = T,stringsAsFactors = F)
head(dfrdata)
## time15 RUB_sol MFA_sol NFA_sol NFY_sol SFY_baro_air NFA_baro_air
## 1 4/30/2012 0:15 11.929 12.689 11.26 8.19 9.43 13.134
## 2 4/30/2012 0:30 11.879 12.627 11.28 8.18 9.25 12.925
## 3 4/30/2012 0:45 11.828 12.570 11.26 8.21 9.03 12.736
## 4 4/30/2012 1:00 11.779 12.511 11.23 8.22 8.82 12.605
## 5 4/30/2012 1:15 11.730 12.459 11.17 8.23 8.60 12.455
## 6 4/30/2012 1:30 11.682 12.397 11.15 8.24 8.42 12.335
cat("\nClass:\n")
##
## Class:
class(dfrdata)
## [1] "data.frame"
#nrow(dfrdata)
dfrdata$time15 <-as.POSIXlt(dfrdata$time15,format="%m/%d/%Y %H:%M")
class(dfrdata)
## [1] "data.frame"
xtsD1 <-xts(dfrdata$RUB_sol,order.by =dfrdata$time15)
names(xtsD1)[1] <-paste("RUB_sol")
class(xtsD1)
## [1] "xts" "zoo"
head(xtsD1)
## RUB_sol
## 2012-04-30 00:15:00 11.929
## 2012-04-30 00:30:00 11.879
## 2012-04-30 00:45:00 11.828
## 2012-04-30 01:00:00 11.779
## 2012-04-30 01:15:00 11.730
## 2012-04-30 01:30:00 11.682
xtsD2 <-xts(dfrdata$MFA_sol,order.by =dfrdata$time15)
names(xtsD2)[1] <-paste("MFA_sol")
class(xtsD2)
## [1] "xts" "zoo"
head(xtsD2)
## MFA_sol
## 2012-04-30 00:15:00 12.689
## 2012-04-30 00:30:00 12.627
## 2012-04-30 00:45:00 12.570
## 2012-04-30 01:00:00 12.511
## 2012-04-30 01:15:00 12.459
## 2012-04-30 01:30:00 12.397
cat("\n")
cat("\nSummary:\n")
##
## Summary:
summary(xtsD1)
## Index RUB_sol
## Min. :2012-04-30 00:15:00 Min. : 9.489
## 1st Qu.:2012-05-15 21:11:15 1st Qu.:13.906
## Median :2012-05-31 18:07:30 Median :16.043
## Mean :2012-05-31 18:07:30 Mean :15.956
## 3rd Qu.:2012-06-16 15:03:45 3rd Qu.:17.971
## Max. :2012-07-02 12:00:00 Max. :22.439
cat("\nStart:\n")
##
## Start:
start(xtsD1)
## [1] "2012-04-30 00:15:00 IST"
cat("\nEnds:\n")
##
## Ends:
end(xtsD1)
## [1] "2012-07-02 12:00:00 IST"
cat("\nFreq:\n")
##
## Freq:
frequency(xtsD1)
## [1] 0.001111111
cat("\nIndex:\n")
##
## Index:
head(index(xtsD1))
## [1] "2012-04-30 00:15:00 IST" "2012-04-30 00:30:00 IST"
## [3] "2012-04-30 00:45:00 IST" "2012-04-30 01:00:00 IST"
## [5] "2012-04-30 01:15:00 IST" "2012-04-30 01:30:00 IST"
cat("\nPeriodicity:\n")
##
## Periodicity:
periodicity(xtsD1)
## 15 minute periodicity from 2012-04-30 00:15:00 to 2012-07-02 12:00:00
cat("\nMonthly OHLC:\n")
##
## Monthly OHLC:
to.monthly(xtsD1)
## xtsD1.Open xtsD1.High xtsD1.Low xtsD1.Close
## Apr 2012 11.929 13.665 10.914 12.589
## May 2012 12.546 18.891 9.489 17.254
## Jun 2012 17.207 22.439 12.995 18.997
## Jul 2012 18.960 21.896 17.883 20.550
cat("\nMonthly Mean:\n")
##
## Monthly Mean:
lapply(split(xtsD1,f="months"),FUN=mean)
## [[1]]
## [1] 12.27673
##
## [[2]]
## [1] 14.09251
##
## [[3]]
## [1] 17.82781
##
## [[4]]
## [1] 19.42347
cat("\nSummary:\n")
##
## Summary:
summary(xtsD2)
## Index MFA_sol
## Min. :2012-04-30 00:15:00 Min. :10.54
## 1st Qu.:2012-05-15 21:11:15 1st Qu.:13.30
## Median :2012-05-31 18:07:30 Median :15.09
## Mean :2012-05-31 18:07:30 Mean :14.92
## 3rd Qu.:2012-06-16 15:03:45 3rd Qu.:16.45
## Max. :2012-07-02 12:00:00 Max. :19.71
cat("\nStart:\n")
##
## Start:
start(xtsD2)
## [1] "2012-04-30 00:15:00 IST"
cat("\nEnds:\n")
##
## Ends:
end(xtsD2)
## [1] "2012-07-02 12:00:00 IST"
cat("\nFreq:\n")
##
## Freq:
frequency(xtsD2)
## [1] 0.001111111
cat("\nIndex:\n")
##
## Index:
head(index(xtsD2))
## [1] "2012-04-30 00:15:00 IST" "2012-04-30 00:30:00 IST"
## [3] "2012-04-30 00:45:00 IST" "2012-04-30 01:00:00 IST"
## [5] "2012-04-30 01:15:00 IST" "2012-04-30 01:30:00 IST"
cat("\nPeriodicity:\n")
##
## Periodicity:
periodicity(xtsD2)
## 15 minute periodicity from 2012-04-30 00:15:00 to 2012-07-02 12:00:00
cat("\nMonthly OHLC:\n")
##
## Monthly OHLC:
to.monthly(xtsD2)
## xtsD2.Open xtsD2.High xtsD2.Low xtsD2.Close
## Apr 2012 12.689 13.855 11.579 12.769
## May 2012 12.701 17.203 10.541 15.834
## Jun 2012 15.796 19.709 13.883 16.295
## Jul 2012 16.252 18.691 15.603 17.819
cat("\nMonthly Mean:\n")
##
## Monthly Mean:
lapply(split(xtsD2,f="months"),FUN=mean)
## [[1]]
## [1] 12.69096
##
## [[2]]
## [1] 13.41241
##
## [[3]]
## [1] 16.45846
##
## [[4]]
## [1] 16.94893
autoplot(xtsD1, ts.colour='blue') +
labs(title="Times Series Plot") +
labs(x="Month") +
labs(y="RUB_sol")
Observation The graph depicts the fluctuations present in the data.
There are no trends visible as such in this time series plot.
This is not a stationary series.
autoplot(xtsD2, ts.colour='blue') +
labs(title="Times Series Plot") +
labs(x="Month") +
labs(y="MFA_sol")
Observation The graph depicts the fluctuations present in the data.
There are no trends visible as such in this time series plot.
This is not a stationary series.
# decompose data
#autoplot(stl(xtsData, s.window = 'periodic'), ts.colour = 'blue')
Observation As the above data has no cycles hence the data cannot be decomposed.
ADF Test for RUB_sol
# Augmented Dickey-Fuller Test
adf.test(xtsD1, alternative="stationary", k=0)
##
## Augmented Dickey-Fuller Test
##
## data: xtsD1
## Dickey-Fuller = -1.9731, Lag order = 0, p-value = 0.5898
## alternative hypothesis: stationary
Observation Ideally, P-value should be less than 0.05.
Here it is greater than 0.05 hence data is not stationary.
Plot ACF for RUB_sol
#tsData <-as.ts(xtsData)
acf(log(xtsD1))
# Auto Correlation Function
autoplot(acf(xtsD1, plot = FALSE))
Plot PACF
#acf(diff(log(xtsD1)))
autoplot(pacf(xtsD1, plot = FALSE))
Make ARIMA Model for RUB_sol
# get arima model (find best model)
armModel <- auto.arima(xtsD1)
armModel
## Series: xtsD1
## ARIMA(5,1,1)
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ma1
## 1.9571 -1.0185 0.1645 -0.1499 0.0409 -0.9766
## s.e. 0.0131 0.0282 0.0309 0.0282 0.0130 0.0029
##
## sigma^2 estimated as 0.0003703: log likelihood=15431.69
## AIC=-30849.37 AICc=-30849.36 BIC=-30802.37
Forecast Using ARIMA Model for RUB_sol
# forecast using
fcData <- forecast(armModel,h=30)
fcData
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 5486401 20.75225 20.72759 20.77691 20.71453 20.78996
## 5487301 20.94708 20.89236 21.00179 20.86340 21.03075
## 5488201 21.13238 21.04270 21.22205 20.99523 21.26953
## 5489101 21.30777 21.17789 21.43764 21.10914 21.50640
## 5490001 21.47257 21.29815 21.64698 21.20582 21.73931
## 5490901 21.62601 21.40354 21.84849 21.28577 21.96626
## 5491801 21.76750 21.49399 22.04101 21.34920 22.18580
## 5492701 21.89651 21.56949 22.22354 21.39637 22.39666
## 5493601 22.01261 21.63006 22.39516 21.42755 22.59767
## 5494501 22.11543 21.67577 22.55508 21.44303 22.78782
## 5495401 22.20469 21.70674 22.70263 21.44315 22.96623
## 5496301 22.28020 21.72316 22.83724 21.42828 23.13212
## 5497201 22.34186 21.72526 22.95846 21.39886 23.28486
## 5498101 22.38964 21.71336 23.06591 21.35537 23.42390
## 5499001 22.42358 21.68783 23.15933 21.29835 23.54882
## 5499901 22.44382 21.64908 23.23857 21.22837 23.65928
## 5500801 22.45057 21.59760 23.30354 21.14607 23.75508
## 5501701 22.44410 21.53393 23.35427 21.05211 23.83609
## 5502601 22.42475 21.45864 23.39087 20.94721 23.90230
## 5503501 22.39294 21.37236 23.41352 20.83210 23.95378
## 5504401 22.34914 21.27577 23.42251 20.70756 23.99072
## 5505301 22.29388 21.16957 23.41818 20.57440 24.01336
## 5506201 22.22773 21.05449 23.40097 20.43342 24.02205
## 5507101 22.15134 20.93131 23.37137 20.28547 24.01721
## 5508001 22.06537 20.80082 23.32993 20.13140 23.99935
## 5508901 21.97055 20.66381 23.27729 19.97206 23.96903
## 5509801 21.86760 20.52110 23.21411 19.80831 23.92690
## 5510701 21.75732 20.37353 23.14112 19.64100 23.87365
## 5511601 21.64050 20.22193 23.05908 19.47098 23.81003
## 5512501 21.51796 20.06711 22.96882 19.29908 23.73685
Plot Forecast Using ARIMA Model for RUB_sol
autoplot(fcData)
ADF Test for MFA_sol
# Augmented Dickey-Fuller Test
adf.test(xtsD2, alternative="stationary", k=0)
##
## Augmented Dickey-Fuller Test
##
## data: xtsD2
## Dickey-Fuller = -2.0628, Lag order = 0, p-value = 0.5517
## alternative hypothesis: stationary
Plot ACF for MFA_sol
#Plots should be greater than zero
# Auto Correlation Function
autoplot(acf(xtsD2, plot = FALSE))
#plot in base
#tsData <- as.ts(xtsData)
acf(log(xtsD2))
Make ARIMA Model for MFA_sol
# get arima model (find best model)
armModel <- auto.arima(xtsD2)
armModel
## Series: xtsD2
## ARIMA(4,1,4)
##
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
## ar1 ar2 ar3 ar4 ma1 ma2 ma3 ma4
## 0.4856 0.7682 -0.3657 -0.008 0.1278 -0.4095 0.2634 0.1029
## s.e. NaN NaN NaN NaN NaN 0.0508 NaN NaN
##
## sigma^2 estimated as 0.0009886: log likelihood=12441.01
## AIC=-24864.02 AICc=-24863.99 BIC=-24803.58
Forecast Using ARIMA Model for MFA_sol
# forecast using
fcData <- forecast(armModel,h=30)
fcData
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 5486401 17.89291 17.85261 17.93320 17.83128 17.95453
## 5487301 17.96065 17.88416 18.03713 17.84367 18.07762
## 5488201 18.02405 17.90482 18.14328 17.84170 18.20640
## 5489101 18.08410 17.91551 18.25269 17.82627 18.34193
## 5490001 18.13661 17.91244 18.36078 17.79377 18.47945
## 5490901 18.18452 17.90132 18.46771 17.75141 18.61763
## 5491801 18.22566 17.88072 18.57060 17.69812 18.75320
## 5492701 18.26277 17.85509 18.67045 17.63927 18.88626
## 5493601 18.29446 17.82336 18.76555 17.57398 19.01493
## 5494501 18.32293 17.78872 18.85713 17.50593 19.13992
## 5495401 18.34720 17.75028 18.94412 17.43429 19.26012
## 5496301 18.36898 17.71028 19.02767 17.36159 19.37636
## 5497201 18.38754 17.66800 19.10708 17.28710 19.48798
## 5498101 18.40418 17.62504 19.18332 17.21258 19.59577
## 5499001 18.41836 17.58079 19.25593 17.13741 19.69931
## 5499901 18.43107 17.53642 19.32573 17.06282 19.79933
## 5500801 18.44191 17.49144 19.39238 16.98829 19.89553
## 5501701 18.45162 17.44667 19.45656 16.91469 19.98854
## 5502601 18.45990 17.40175 19.51804 16.84160 20.07819
## 5503501 18.46731 17.35727 19.57736 16.76965 20.16498
## 5504401 18.47364 17.31292 19.63436 16.69847 20.24880
## 5505301 18.47930 17.26913 19.68947 16.62851 20.33009
## 5506201 18.48413 17.22567 19.74259 16.55948 20.40878
## 5507101 18.48846 17.18285 19.79407 16.49170 20.48522
## 5508001 18.49215 17.14046 19.84383 16.42492 20.55937
## 5508901 18.49545 17.09875 19.89216 16.35938 20.63153
## 5509801 18.49827 17.05754 19.93900 16.29487 20.70168
## 5510701 18.50079 17.01701 19.98458 16.23154 20.77005
## 5511601 18.50295 16.97702 20.02887 16.16925 20.83665
## 5512501 18.50487 16.93770 20.07205 16.10808 20.90167
Plot Forecast Using ARIMA Model for MFA_sol
autoplot(fcData)