Objective

Predict 30 Data Points

From the given data set taking two columns RUB_sol & MFA_sol and predicting 30 data points on it

Dataset Size
6096 observations of 7 variables

library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.3
library(tseries)
## Warning: package 'tseries' was built under R version 3.3.3
library(xts)
## Warning: package 'xts' was built under R version 3.3.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.3.3
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
## 
##     first, last
library(forecast)
## Warning: package 'forecast' was built under R version 3.3.3
library(quantmod)
## Warning: package 'quantmod' was built under R version 3.3.3
## Loading required package: TTR
## Warning: package 'TTR' was built under R version 3.3.3
## Version 0.4-0 included new data defaults. See ?getSymbols.
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.3.3
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(ggfortify)
## Warning: package 'ggfortify' was built under R version 3.3.3
## 
## Attaching package: 'ggfortify'
## The following object is masked from 'package:forecast':
## 
##     gglagplot


Reading File

dfrdata <- read.csv("./data/xtsdata.csv",header = T,stringsAsFactors = F)
head(dfrdata)
##           time15 RUB_sol MFA_sol NFA_sol NFY_sol SFY_baro_air NFA_baro_air
## 1 4/30/2012 0:15  11.929  12.689   11.26    8.19         9.43       13.134
## 2 4/30/2012 0:30  11.879  12.627   11.28    8.18         9.25       12.925
## 3 4/30/2012 0:45  11.828  12.570   11.26    8.21         9.03       12.736
## 4 4/30/2012 1:00  11.779  12.511   11.23    8.22         8.82       12.605
## 5 4/30/2012 1:15  11.730  12.459   11.17    8.23         8.60       12.455
## 6 4/30/2012 1:30  11.682  12.397   11.15    8.24         8.42       12.335
cat("\nClass:\n")
## 
## Class:
class(dfrdata)
## [1] "data.frame"
#nrow(dfrdata)

As the class is data frame, therefore need to convert it into a time series

Preparing Data

is.Date(dfrdata$time15)
## [1] FALSE
cat("\n")
dfrdata$time15 <- as.POSIXlt(dfrdata$time15,format="%m/%d/%Y %H:%M")
xtsRub <- xts(dfrdata$RUB_sol, order.by=dfrdata$time15)
xtsMfa <- xts(dfrdata$MFA_sol, order.by=dfrdata$time15)
colnames(xtsRub) <- c("RUB_sol")
colnames(xtsMfa) <- c("MFA_sol")
cat("Extended Time Series RUB_sol\n")
## Extended Time Series RUB_sol
head(xtsRub)
##                     RUB_sol
## 2012-04-30 00:15:00  11.929
## 2012-04-30 00:30:00  11.879
## 2012-04-30 00:45:00  11.828
## 2012-04-30 01:00:00  11.779
## 2012-04-30 01:15:00  11.730
## 2012-04-30 01:30:00  11.682
cat("\n")
cat("Extended Time Series MFA_sol\n")
## Extended Time Series MFA_sol
head(xtsMfa)
##                     MFA_sol
## 2012-04-30 00:15:00  12.689
## 2012-04-30 00:30:00  12.627
## 2012-04-30 00:45:00  12.570
## 2012-04-30 01:00:00  12.511
## 2012-04-30 01:15:00  12.459
## 2012-04-30 01:30:00  12.397


Exploratory Analysis RUB_sol

cat("\n")
cat("\nSummary:\n")
## 
## Summary:
summary(xtsRub)
##      Index                        RUB_sol      
##  Min.   :2012-04-30 00:15:00   Min.   : 9.489  
##  1st Qu.:2012-05-15 21:11:15   1st Qu.:13.906  
##  Median :2012-05-31 18:07:30   Median :16.043  
##  Mean   :2012-05-31 18:07:30   Mean   :15.956  
##  3rd Qu.:2012-06-16 15:03:45   3rd Qu.:17.971  
##  Max.   :2012-07-02 12:00:00   Max.   :22.439
cat("\nStart:\n")
## 
## Start:
start(xtsRub)
## [1] "2012-04-30 00:15:00 IST"
cat("\nEnds:\n")
## 
## Ends:
end(xtsRub)
## [1] "2012-07-02 12:00:00 IST"
cat("\nFreq:\n")
## 
## Freq:
frequency(xtsRub)
## [1] 0.001111111
cat("\nIndex:\n")
## 
## Index:
head(index(xtsRub))
## [1] "2012-04-30 00:15:00 IST" "2012-04-30 00:30:00 IST"
## [3] "2012-04-30 00:45:00 IST" "2012-04-30 01:00:00 IST"
## [5] "2012-04-30 01:15:00 IST" "2012-04-30 01:30:00 IST"
cat("\nPeriodicity:\n")
## 
## Periodicity:
periodicity(xtsRub)
## 15 minute periodicity from 2012-04-30 00:15:00 to 2012-07-02 12:00:00
cat("\nMonthly OHLC:\n")
## 
## Monthly OHLC:
to.monthly(xtsRub)
##          xtsRub.Open xtsRub.High xtsRub.Low xtsRub.Close
## Apr 2012      11.929      13.665     10.914       12.589
## May 2012      12.546      18.891      9.489       17.254
## Jun 2012      17.207      22.439     12.995       18.997
## Jul 2012      18.960      21.896     17.883       20.550
cat("\nMonthly Mean:\n")
## 
## Monthly Mean:
lapply(split(xtsRub,f="months"),FUN=mean)
## [[1]]
## [1] 12.27673
## 
## [[2]]
## [1] 14.09251
## 
## [[3]]
## [1] 17.82781
## 
## [[4]]
## [1] 19.42347


autoplot(xtsRub, ts.colour='blue') +
    labs(title="Times Series Plot") +
    labs(x="Month") +
    labs(y="RUB_sol")


Exploratory Analysis MFA_sol

cat("\nSummary:\n")
## 
## Summary:
summary(xtsMfa)
##      Index                        MFA_sol     
##  Min.   :2012-04-30 00:15:00   Min.   :10.54  
##  1st Qu.:2012-05-15 21:11:15   1st Qu.:13.30  
##  Median :2012-05-31 18:07:30   Median :15.09  
##  Mean   :2012-05-31 18:07:30   Mean   :14.92  
##  3rd Qu.:2012-06-16 15:03:45   3rd Qu.:16.45  
##  Max.   :2012-07-02 12:00:00   Max.   :19.71
cat("\nStart:\n")
## 
## Start:
start(xtsMfa)
## [1] "2012-04-30 00:15:00 IST"
cat("\nEnds:\n")
## 
## Ends:
end(xtsMfa)
## [1] "2012-07-02 12:00:00 IST"
cat("\nFreq:\n")
## 
## Freq:
frequency(xtsMfa)
## [1] 0.001111111
cat("\nIndex:\n")
## 
## Index:
head(index(xtsMfa))
## [1] "2012-04-30 00:15:00 IST" "2012-04-30 00:30:00 IST"
## [3] "2012-04-30 00:45:00 IST" "2012-04-30 01:00:00 IST"
## [5] "2012-04-30 01:15:00 IST" "2012-04-30 01:30:00 IST"
cat("\nPeriodicity:\n")
## 
## Periodicity:
periodicity(xtsMfa)
## 15 minute periodicity from 2012-04-30 00:15:00 to 2012-07-02 12:00:00
cat("\nMonthly OHLC:\n")
## 
## Monthly OHLC:
to.monthly(xtsMfa)
##          xtsMfa.Open xtsMfa.High xtsMfa.Low xtsMfa.Close
## Apr 2012      12.689      13.855     11.579       12.769
## May 2012      12.701      17.203     10.541       15.834
## Jun 2012      15.796      19.709     13.883       16.295
## Jul 2012      16.252      18.691     15.603       17.819
cat("\nMonthly Mean:\n")
## 
## Monthly Mean:
lapply(split(xtsMfa,f="months"),FUN=mean)
## [[1]]
## [1] 12.69096
## 
## [[2]]
## [1] 13.41241
## 
## [[3]]
## [1] 16.45846
## 
## [[4]]
## [1] 16.94893


autoplot(xtsMfa, ts.colour='blue') +
    labs(title="Times Series Plot") +
    labs(x="Month") +
    labs(y="MFA_sol")

Month-wise Segregation RUB_sol

# make data for box plot
dfData <- as.data.frame(xtsRub)
colnames(dfData) <- c("values")
dfData <- mutate(dfData, 
                 dates=row.names(dfData),
                 months=as.numeric(substring(row.names(dfData),6,7)))
dfData$months <- as.factor(dfData$months)
levels(dfData$months) <- c("Apr","May","Jun","Jul")
head(dfData,10)
##    values               dates months
## 1  11.929 2012-04-30 00:15:00    Apr
## 2  11.879 2012-04-30 00:30:00    Apr
## 3  11.828 2012-04-30 00:45:00    Apr
## 4  11.779 2012-04-30 01:00:00    Apr
## 5  11.730 2012-04-30 01:15:00    Apr
## 6  11.682 2012-04-30 01:30:00    Apr
## 7  11.635 2012-04-30 01:45:00    Apr
## 8  11.589 2012-04-30 02:00:00    Apr
## 9  11.545 2012-04-30 02:15:00    Apr
## 10 11.502 2012-04-30 02:30:00    Apr

Plot Monthly Mean Data

ggplot(dfData,aes(x=months,y=values)) +
    geom_boxplot(outlier.colour = "red", colour = "blue")+
    labs(title= "Monthly Box Plot")+
    labs(x= "Months")+
    labs(y= "RUB_sol")

Month-wise Segregation MFA_sol

# make data for box plot
dfData <- as.data.frame(xtsMfa)
colnames(dfData) <- c("values")
dfData <- mutate(dfData, 
                 dates=row.names(dfData),
                 months=as.numeric(substring(row.names(dfData),6,7)))
dfData$months <- as.factor(dfData$months)
levels(dfData$months) <- c("Apr","May","Jun","Jul")
head(dfData,10)
##    values               dates months
## 1  12.689 2012-04-30 00:15:00    Apr
## 2  12.627 2012-04-30 00:30:00    Apr
## 3  12.570 2012-04-30 00:45:00    Apr
## 4  12.511 2012-04-30 01:00:00    Apr
## 5  12.459 2012-04-30 01:15:00    Apr
## 6  12.397 2012-04-30 01:30:00    Apr
## 7  12.353 2012-04-30 01:45:00    Apr
## 8  12.296 2012-04-30 02:00:00    Apr
## 9  12.250 2012-04-30 02:15:00    Apr
## 10 12.207 2012-04-30 02:30:00    Apr

Plot Monthly Mean Data

ggplot(dfData,aes(x=months,y=values)) +
    geom_boxplot(outlier.colour = "red", colour = "blue")+
    labs(title= "Monthly Box Plot")+
    labs(x= "Months")+
    labs(y= "MFA_sol")


ADF Test

# Augmented Dickey-Fuller Test
adf.test(xtsRub, alternative="stationary", k=0)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  xtsRub
## Dickey-Fuller = -1.9731, Lag order = 0, p-value = 0.5898
## alternative hypothesis: stationary
adf.test(xtsMfa, alternative="stationary", k=0)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  xtsMfa
## Dickey-Fuller = -2.0628, Lag order = 0, p-value = 0.5517
## alternative hypothesis: stationary

P-value higher than 0.05
Therefore the data is not stationary for both RUB_sol & MFA_sol

Plot ACF

autoplot(acf(xtsRub, plot = FALSE))

autoplot(acf(xtsMfa, plot = FALSE))


Plot PACF

autoplot(pacf(xtsRub, plot = FALSE))

autoplot(pacf(xtsMfa, plot = FALSE))

As all the ACF is above zero we use ARIMA for both

Predicting the next 30 Data Points at 99% Level of Confidence


Make ARIMA Model RUB_sol

armModel <- auto.arima(xtsRub)
armModel
## Series: xtsRub 
## ARIMA(5,1,1)                    
## 
## Coefficients:
##          ar1      ar2     ar3      ar4     ar5      ma1
##       1.9571  -1.0185  0.1645  -0.1499  0.0409  -0.9766
## s.e.  0.0131   0.0282  0.0309   0.0282  0.0130   0.0029
## 
## sigma^2 estimated as 0.0003703:  log likelihood=15431.69
## AIC=-30849.37   AICc=-30849.36   BIC=-30802.37


Forecast Using ARIMA Model

cat("\nNext 30 Data Points of RUB_sol\n")
## 
## Next 30 Data Points of RUB_sol
fcData <- forecast(armModel,h=30,level = 99)
fcData
##         Point Forecast    Lo 99    Hi 99
## 5486401       20.75225 20.70268 20.80181
## 5487301       20.94708 20.83710 21.05705
## 5488201       21.13238 20.95213 21.31262
## 5489101       21.30777 21.04672 21.56881
## 5490001       21.47257 21.12200 21.82313
## 5490901       21.62601 21.17885 22.07317
## 5491801       21.76750 21.21776 22.31724
## 5492701       21.89651 21.23922 22.55381
## 5493601       22.01261 21.24371 22.78151
## 5494501       22.11543 21.23175 22.99910
## 5495401       22.20469 21.20385 23.20552
## 5496301       22.28020 21.16058 23.39982
## 5497201       22.34186 21.10254 23.58118
## 5498101       22.38964 21.03038 23.74889
## 5499001       22.42358 20.94477 23.90239
## 5499901       22.44382 20.84645 24.04120
## 5500801       22.45057 20.73616 24.16498
## 5501701       22.44410 20.61472 24.27348
## 5502601       22.42475 20.48293 24.36658
## 5503501       22.39294 20.34165 24.44424
## 5504401       22.34914 20.19174 24.50654
## 5505301       22.29388 20.03410 24.55366
## 5506201       22.22773 19.86961 24.58586
## 5507101       22.15134 19.69917 24.60351
## 5508001       22.06537 19.52370 24.60705
## 5508901       21.97055 19.34409 24.59700
## 5509801       21.86760 19.16123 24.57398
## 5510701       21.75732 18.97600 24.53865
## 5511601       21.64050 18.78926 24.49175
## 5512501       21.51796 18.60185 24.43408


autoplot(fcData,ts.colour = "green")+
  labs(title = "RUB_sol Prediction (30 Data Points)")


Make ARIMA Model MFA_sol

armModel2 <- auto.arima(xtsMfa)
armModel2
## Series: xtsMfa 
## ARIMA(4,1,4)                    
## 
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
##          ar1     ar2      ar3     ar4     ma1      ma2     ma3     ma4
##       0.4856  0.7682  -0.3657  -0.008  0.1278  -0.4095  0.2634  0.1029
## s.e.     NaN     NaN      NaN     NaN     NaN   0.0508     NaN     NaN
## 
## sigma^2 estimated as 0.0009886:  log likelihood=12441.01
## AIC=-24864.02   AICc=-24863.99   BIC=-24803.58


Forecast Using ARIMA Model

cat("\nNext 30 Data Points of MFA_sol\n")
## 
## Next 30 Data Points of MFA_sol
fcData2 <- forecast(armModel2,h=30,level = 99)
fcData2
##         Point Forecast    Lo 99    Hi 99
## 5486401       17.89291 17.81192 17.97390
## 5487301       17.96065 17.80691 18.11438
## 5488201       18.02405 17.78440 18.26370
## 5489101       18.08410 17.74525 18.42295
## 5490001       18.13661 17.68604 18.58718
## 5490901       18.18452 17.61532 18.75372
## 5491801       18.22566 17.53236 18.91897
## 5492701       18.26277 17.44336 19.08218
## 5493601       18.29446 17.34759 19.24132
## 5494501       18.32293 17.24921 19.39664
## 5495401       18.34720 17.14743 19.54697
## 5496301       18.36898 17.04505 19.69291
## 5497201       18.38754 16.94132 19.83376
## 5498101       18.40418 16.83816 19.97020
## 5499001       18.41836 16.73490 20.10182
## 5499901       18.43107 16.63288 20.22927
## 5500801       18.44191 16.53152 20.35229
## 5501701       18.45162 16.43175 20.47148
## 5502601       18.45990 16.33310 20.58669
## 5503501       18.46731 16.23620 20.69842
## 5504401       18.47364 16.14067 20.80660
## 5505301       18.47930 16.04695 20.91165
## 5506201       18.48413 15.95471 21.01355
## 5507101       18.48846 15.86427 21.11264
## 5508001       18.49215 15.77535 21.20894
## 5508901       18.49545 15.68817 21.30273
## 5509801       18.49827 15.60251 21.39404
## 5510701       18.50079 15.51849 21.48310
## 5511601       18.50295 15.43594 21.56995
## 5512501       18.50487 15.35495 21.65480


autoplot(fcData2, ts.colour="green")+
  labs(title= "MFA_sol Prediction (30 Data Points)")