Set up
Load Libraries
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.3.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.3.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.3
library(tseries)
## Warning: package 'tseries' was built under R version 3.3.3
library(xts)
## Warning: package 'xts' was built under R version 3.3.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.3.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
library(forecast)
## Warning: package 'forecast' was built under R version 3.3.3
library(quantmod)
## Warning: package 'quantmod' was built under R version 3.3.3
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 3.3.3
## Version 0.4-0 included new data defaults. See ?getSymbols.
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.3.3
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(ggfortify)
## Warning: package 'ggfortify' was built under R version 3.3.3
##
## Attaching package: 'ggfortify'
## The following object is masked from 'package:forecast':
##
## gglagplot
Read Data
dfrdata <- read.csv("xtsdata.csv",header = T,stringsAsFactors = F)
head(dfrdata)
## time15 RUB_sol MFA_sol NFA_sol NFY_sol SFY_baro_air NFA_baro_air
## 1 4/30/2012 0:15 11.929 12.689 11.26 8.19 9.43 13.134
## 2 4/30/2012 0:30 11.879 12.627 11.28 8.18 9.25 12.925
## 3 4/30/2012 0:45 11.828 12.570 11.26 8.21 9.03 12.736
## 4 4/30/2012 1:00 11.779 12.511 11.23 8.22 8.82 12.605
## 5 4/30/2012 1:15 11.730 12.459 11.17 8.23 8.60 12.455
## 6 4/30/2012 1:30 11.682 12.397 11.15 8.24 8.42 12.335
cat("\nClass:\n")
##
## Class:
class(dfrdata)
## [1] "data.frame"
#nrow(dfrdata)
dfrdata$time15 <-as.POSIXlt(dfrdata$time15,format="%m/%d/%Y %H:%M")
class(dfrdata)
## [1] "data.frame"
xtsR <-xts(dfrdata$RUB_sol,order.by =dfrdata$time15)
names(xtsR)[1] <-paste("RUB_sol")
class(xtsR)
## [1] "xts" "zoo"
head(xtsR)
## RUB_sol
## 2012-04-30 00:15:00 11.929
## 2012-04-30 00:30:00 11.879
## 2012-04-30 00:45:00 11.828
## 2012-04-30 01:00:00 11.779
## 2012-04-30 01:15:00 11.730
## 2012-04-30 01:30:00 11.682
xtsM <-xts(dfrdata$MFA_sol,order.by =dfrdata$time15)
names(xtsM)[1] <-paste("MFA_sol")
class(xtsM)
## [1] "xts" "zoo"
head(xtsM)
## MFA_sol
## 2012-04-30 00:15:00 12.689
## 2012-04-30 00:30:00 12.627
## 2012-04-30 00:45:00 12.570
## 2012-04-30 01:00:00 12.511
## 2012-04-30 01:15:00 12.459
## 2012-04-30 01:30:00 12.397
Observation converting date column into a proper format Extracting RUB_sol column and converting it into xtsD1 Extracting MFA_sol column and converting it into xtsD2
xtsR info
cat("\n")
cat("\nSummary:\n")
##
## Summary:
summary(xtsR)
## Index RUB_sol
## Min. :2012-04-30 00:15:00 Min. : 9.489
## 1st Qu.:2012-05-15 21:11:15 1st Qu.:13.906
## Median :2012-05-31 18:07:30 Median :16.043
## Mean :2012-05-31 18:07:30 Mean :15.956
## 3rd Qu.:2012-06-16 15:03:45 3rd Qu.:17.971
## Max. :2012-07-02 12:00:00 Max. :22.439
cat("\nStart:\n")
##
## Start:
start(xtsR)
## [1] "2012-04-30 00:15:00 IST"
cat("\nEnds:\n")
##
## Ends:
end(xtsR)
## [1] "2012-07-02 12:00:00 IST"
cat("\nFreq:\n")
##
## Freq:
frequency(xtsR)
## [1] 0.001111111
cat("\nIndex:\n")
##
## Index:
head(index(xtsR))
## [1] "2012-04-30 00:15:00 IST" "2012-04-30 00:30:00 IST"
## [3] "2012-04-30 00:45:00 IST" "2012-04-30 01:00:00 IST"
## [5] "2012-04-30 01:15:00 IST" "2012-04-30 01:30:00 IST"
cat("\nPeriodicity:\n")
##
## Periodicity:
periodicity(xtsR)
## 15 minute periodicity from 2012-04-30 00:15:00 to 2012-07-02 12:00:00
cat("\nMonthly OHLC:\n")
##
## Monthly OHLC:
to.monthly(xtsR)
## xtsR.Open xtsR.High xtsR.Low xtsR.Close
## Apr 2012 11.929 13.665 10.914 12.589
## May 2012 12.546 18.891 9.489 17.254
## Jun 2012 17.207 22.439 12.995 18.997
## Jul 2012 18.960 21.896 17.883 20.550
cat("\nMonthly Mean:\n")
##
## Monthly Mean:
lapply(split(xtsR,f="months"),FUN=mean)
## [[1]]
## [1] 12.27673
##
## [[2]]
## [1] 14.09251
##
## [[3]]
## [1] 17.82781
##
## [[4]]
## [1] 19.42347
xtsM info
cat("\nSummary:\n")
##
## Summary:
summary(xtsM)
## Index MFA_sol
## Min. :2012-04-30 00:15:00 Min. :10.54
## 1st Qu.:2012-05-15 21:11:15 1st Qu.:13.30
## Median :2012-05-31 18:07:30 Median :15.09
## Mean :2012-05-31 18:07:30 Mean :14.92
## 3rd Qu.:2012-06-16 15:03:45 3rd Qu.:16.45
## Max. :2012-07-02 12:00:00 Max. :19.71
cat("\nStart:\n")
##
## Start:
start(xtsM)
## [1] "2012-04-30 00:15:00 IST"
cat("\nEnds:\n")
##
## Ends:
end(xtsM)
## [1] "2012-07-02 12:00:00 IST"
cat("\nFreq:\n")
##
## Freq:
frequency(xtsM)
## [1] 0.001111111
cat("\nIndex:\n")
##
## Index:
head(index(xtsM))
## [1] "2012-04-30 00:15:00 IST" "2012-04-30 00:30:00 IST"
## [3] "2012-04-30 00:45:00 IST" "2012-04-30 01:00:00 IST"
## [5] "2012-04-30 01:15:00 IST" "2012-04-30 01:30:00 IST"
cat("\nPeriodicity:\n")
##
## Periodicity:
periodicity(xtsM)
## 15 minute periodicity from 2012-04-30 00:15:00 to 2012-07-02 12:00:00
cat("\nMonthly OHLC:\n")
##
## Monthly OHLC:
to.monthly(xtsM)
## xtsM.Open xtsM.High xtsM.Low xtsM.Close
## Apr 2012 12.689 13.855 11.579 12.769
## May 2012 12.701 17.203 10.541 15.834
## Jun 2012 15.796 19.709 13.883 16.295
## Jul 2012 16.252 18.691 15.603 17.819
cat("\nMonthly Mean:\n")
##
## Monthly Mean:
lapply(split(xtsM,f="months"),FUN=mean)
## [[1]]
## [1] 12.69096
##
## [[2]]
## [1] 13.41241
##
## [[3]]
## [1] 16.45846
##
## [[4]]
## [1] 16.94893
Plot xtsR
autoplot(xtsR, ts.colour='blue') +
labs(title="Times Series Plot") +
labs(x="Month") +
labs(y="RUB_sol")
Plot xtsM
autoplot(xtsM, ts.colour='blue') +
labs(title="Times Series Plot") +
labs(x="Month") +
labs(y="MFA_sol")
Observation As the above data has no seasonal cycles or have cycles which are less then two hence the data cannot be decomposed
ADF Test
# Augmented Dickey-Fuller Test
adf.test(xtsR, alternative="stationary", k=0)
##
## Augmented Dickey-Fuller Test
##
## data: xtsR
## Dickey-Fuller = -1.9731, Lag order = 0, p-value = 0.5898
## alternative hypothesis: stationary
Observation The P value is greater then the printed P value In real world as ADF value is less then p value we do not use ARIMA model but for academic purpose here it is used Plot ACF
#tsData <-as.ts(xtsData)
acf(log(xtsR))
# Auto Correlation Function
autoplot(acf(xtsR, plot = FALSE))
Observation As all the values are above 0 we use the ARIMA model Plot PACF
#acf(diff(log(xtsD1)))
autoplot(pacf(xtsR, plot = FALSE))
Observation PACF is used to denote auto correlation
Make ARIMA Model
# get arima model (find best model)
armModel <- auto.arima(xtsR)
armModel
## Series: xtsR
## ARIMA(5,1,1)
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ma1
## 1.9571 -1.0185 0.1645 -0.1499 0.0409 -0.9766
## s.e. 0.0131 0.0282 0.0309 0.0282 0.0130 0.0029
##
## sigma^2 estimated as 0.0003703: log likelihood=15431.69
## AIC=-30849.37 AICc=-30849.36 BIC=-30802.37
Forecast Using ARIMA Model
# forecast using
fcData <- forecast(armModel,h=30)
fcData
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 5486401 20.75225 20.72759 20.77691 20.71453 20.78996
## 5487301 20.94708 20.89236 21.00179 20.86340 21.03075
## 5488201 21.13238 21.04270 21.22205 20.99523 21.26953
## 5489101 21.30777 21.17789 21.43764 21.10914 21.50640
## 5490001 21.47257 21.29815 21.64698 21.20582 21.73931
## 5490901 21.62601 21.40354 21.84849 21.28577 21.96626
## 5491801 21.76750 21.49399 22.04101 21.34920 22.18580
## 5492701 21.89651 21.56949 22.22354 21.39637 22.39666
## 5493601 22.01261 21.63006 22.39516 21.42755 22.59767
## 5494501 22.11543 21.67577 22.55508 21.44303 22.78782
## 5495401 22.20469 21.70674 22.70263 21.44315 22.96623
## 5496301 22.28020 21.72316 22.83724 21.42828 23.13212
## 5497201 22.34186 21.72526 22.95846 21.39886 23.28486
## 5498101 22.38964 21.71336 23.06591 21.35537 23.42390
## 5499001 22.42358 21.68783 23.15933 21.29835 23.54882
## 5499901 22.44382 21.64908 23.23857 21.22837 23.65928
## 5500801 22.45057 21.59760 23.30354 21.14607 23.75508
## 5501701 22.44410 21.53393 23.35427 21.05211 23.83609
## 5502601 22.42475 21.45864 23.39087 20.94721 23.90230
## 5503501 22.39294 21.37236 23.41352 20.83210 23.95378
## 5504401 22.34914 21.27577 23.42251 20.70756 23.99072
## 5505301 22.29388 21.16957 23.41818 20.57440 24.01336
## 5506201 22.22773 21.05449 23.40097 20.43342 24.02205
## 5507101 22.15134 20.93131 23.37137 20.28547 24.01721
## 5508001 22.06537 20.80082 23.32993 20.13140 23.99935
## 5508901 21.97055 20.66381 23.27729 19.97206 23.96903
## 5509801 21.86760 20.52110 23.21411 19.80831 23.92690
## 5510701 21.75732 20.37353 23.14112 19.64100 23.87365
## 5511601 21.64050 20.22193 23.05908 19.47098 23.81003
## 5512501 21.51796 20.06711 22.96882 19.29908 23.73685
Observation We have to plot 30 data points the above points have plotted and vizual effects are seen through the ARIMA graph Plot Forecast Using ARIMA Model
autoplot(fcData)
ADF Test for XTSM
# Augmented Dickey-Fuller Test
adf.test(xtsM, alternative="stationary", k=0)
##
## Augmented Dickey-Fuller Test
##
## data: xtsM
## Dickey-Fuller = -2.0628, Lag order = 0, p-value = 0.5517
## alternative hypothesis: stationary
Observation ADF test conducted for MFA_sol Plot ACF for XTSM
#Plots should be greater than zero
# Auto Correlation Function
autoplot(acf(xtsM, plot = FALSE))
#plot in base
#tsData <- as.ts(xtsData)
acf(log(xtsM))
Observation The above graph shows all the points above 0 and hence we use the ARIMA model for predictions
Make ARIMA Model for XTSM
# get arima model (find best model)
armModel2 <- auto.arima(xtsM)
armModel2
## Series: xtsM
## ARIMA(4,1,4)
##
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
## ar1 ar2 ar3 ar4 ma1 ma2 ma3 ma4
## 0.4856 0.7682 -0.3657 -0.008 0.1278 -0.4095 0.2634 0.1029
## s.e. NaN NaN NaN NaN NaN 0.0508 NaN NaN
##
## sigma^2 estimated as 0.0009886: log likelihood=12441.01
## AIC=-24864.02 AICc=-24863.99 BIC=-24803.58
Forecast Using ARIMA Model for XTSM
# forecast using
fcData2 <- forecast(armModel,h=30)
fcData2
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 5486401 20.75225 20.72759 20.77691 20.71453 20.78996
## 5487301 20.94708 20.89236 21.00179 20.86340 21.03075
## 5488201 21.13238 21.04270 21.22205 20.99523 21.26953
## 5489101 21.30777 21.17789 21.43764 21.10914 21.50640
## 5490001 21.47257 21.29815 21.64698 21.20582 21.73931
## 5490901 21.62601 21.40354 21.84849 21.28577 21.96626
## 5491801 21.76750 21.49399 22.04101 21.34920 22.18580
## 5492701 21.89651 21.56949 22.22354 21.39637 22.39666
## 5493601 22.01261 21.63006 22.39516 21.42755 22.59767
## 5494501 22.11543 21.67577 22.55508 21.44303 22.78782
## 5495401 22.20469 21.70674 22.70263 21.44315 22.96623
## 5496301 22.28020 21.72316 22.83724 21.42828 23.13212
## 5497201 22.34186 21.72526 22.95846 21.39886 23.28486
## 5498101 22.38964 21.71336 23.06591 21.35537 23.42390
## 5499001 22.42358 21.68783 23.15933 21.29835 23.54882
## 5499901 22.44382 21.64908 23.23857 21.22837 23.65928
## 5500801 22.45057 21.59760 23.30354 21.14607 23.75508
## 5501701 22.44410 21.53393 23.35427 21.05211 23.83609
## 5502601 22.42475 21.45864 23.39087 20.94721 23.90230
## 5503501 22.39294 21.37236 23.41352 20.83210 23.95378
## 5504401 22.34914 21.27577 23.42251 20.70756 23.99072
## 5505301 22.29388 21.16957 23.41818 20.57440 24.01336
## 5506201 22.22773 21.05449 23.40097 20.43342 24.02205
## 5507101 22.15134 20.93131 23.37137 20.28547 24.01721
## 5508001 22.06537 20.80082 23.32993 20.13140 23.99935
## 5508901 21.97055 20.66381 23.27729 19.97206 23.96903
## 5509801 21.86760 20.52110 23.21411 19.80831 23.92690
## 5510701 21.75732 20.37353 23.14112 19.64100 23.87365
## 5511601 21.64050 20.22193 23.05908 19.47098 23.81003
## 5512501 21.51796 20.06711 22.96882 19.29908 23.73685
Observation We have plotted the above 30 data points for MFA_sol and vizually can be seen in the ARIMA graph Plot Forecast Using ARIMA Model for XTSM
autoplot(fcData2)