Assignment

Problem Definition :

The objective is to predict 30 data points for the columns RUB_sol and MFA_sol.

Adding Libraries

library(tidyr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(lubridate)

## 
## Attaching package: 'lubridate'

## The following object is masked from 'package:base':
## 
##     date

library(tseries)
library(xts)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## 
## Attaching package: 'xts'

## The following objects are masked from 'package:dplyr':
## 
##     first, last

library(forecast)
library(quantmod)

## Loading required package: TTR

## Version 0.4-0 included new data defaults. See ?getSymbols.

library(ggfortify)

## Loading required package: ggplot2

## 
## Attaching package: 'ggfortify'

## The following object is masked from 'package:forecast':
## 
##     gglagplot

library(stringr)

Reading and Creating Extensible Time Series From a csv

dfrxtsSampleData <- read.csv("F:/Management/Trimester 4/Core/3. Machine Learning/Practical/Data/xtsdata.csv", header=T, stringsAsFactors=F)
names(dfrxtsSampleData)

## [1] "time15"       "RUB_sol"      "MFA_sol"      "NFA_sol"     
## [5] "NFY_sol"      "SFY_baro_air" "NFA_baro_air"

dfrxtsSampleData$time15 <- as.POSIXlt(dfrxtsSampleData$time15,format="%m/%d/%Y %H:%M")
dfrxtsSampleRub <- select(dfrxtsSampleData,time15,RUB_sol)
dfrxtsSampleRub <- as.data.frame(dfrxtsSampleRub)
xtsSampleRub <- xts(dfrxtsSampleRub$RUB_sol, order.by = dfrxtsSampleRub$time15 , frequency = 6096)
head(xtsSampleRub)

##                       [,1]
## 2012-04-30 00:15:00 11.929
## 2012-04-30 00:30:00 11.879
## 2012-04-30 00:45:00 11.828
## 2012-04-30 01:00:00 11.779
## 2012-04-30 01:15:00 11.730
## 2012-04-30 01:30:00 11.682

## Extended Time Series MFA_sol
dfrxtsSampleMfa <- select(dfrxtsSampleData,time15,MFA_sol)
dfrxtsSampleMfa <- as.data.frame(dfrxtsSampleMfa)
xtsSampleMfa    <- xts(dfrxtsSampleMfa$MFA_sol, order.by=dfrxtsSampleMfa$time15, frequency = 6096)
head(xtsSampleMfa)

##                       [,1]
## 2012-04-30 00:15:00 12.689
## 2012-04-30 00:30:00 12.627
## 2012-04-30 00:45:00 12.570
## 2012-04-30 01:00:00 12.511
## 2012-04-30 01:15:00 12.459
## 2012-04-30 01:30:00 12.397

xtsSampleRub Info

cat("\nSummary:\n")

## 
## Summary:

summary(xtsSampleRub)

##      Index                      xtsSampleRub   
##  Min.   :2012-04-30 00:15:00   Min.   : 9.489  
##  1st Qu.:2012-05-15 21:11:15   1st Qu.:13.906  
##  Median :2012-05-31 18:07:30   Median :16.043  
##  Mean   :2012-05-31 18:07:30   Mean   :15.956  
##  3rd Qu.:2012-06-16 15:03:45   3rd Qu.:17.971  
##  Max.   :2012-07-02 12:00:00   Max.   :22.439

cat("\nStart:\n")

## 
## Start:

start(xtsSampleRub)

## [1] "2012-04-30 00:15:00 IST"

cat("\nEnds:\n")

## 
## Ends:

end(xtsSampleRub)

## [1] "2012-07-02 12:00:00 IST"

cat("\nFreq:\n")

## 
## Freq:

frequency(xtsSampleRub)

## [1] 0.001111111

cat("\nIndex:\n")

## 
## Index:

head(index(xtsSampleRub))

## [1] "2012-04-30 00:15:00 IST" "2012-04-30 00:30:00 IST"
## [3] "2012-04-30 00:45:00 IST" "2012-04-30 01:00:00 IST"
## [5] "2012-04-30 01:15:00 IST" "2012-04-30 01:30:00 IST"

cat("\nPeriodicity:\n")

## 
## Periodicity:

periodicity(xtsSampleRub)

## 15 minute periodicity from 2012-04-30 00:15:00 to 2012-07-02 12:00:00

cat("\nYearly OHLC:\n")

## 
## Yearly OHLC:

to.yearly(xtsSampleRub)

##            xtsSampleRub.Open xtsSampleRub.High xtsSampleRub.Low
## 2012-07-02            11.929            22.439            9.489
##            xtsSampleRub.Close
## 2012-07-02              20.55

cat("\nYearly Mean:\n")

## 
## Yearly Mean:

lapply(split(xtsSampleRub,f="years"),FUN=mean)

## [[1]]
## [1] 15.95573

cat("\nQuarterly OHLC:\n")

## 
## Quarterly OHLC:

head(to.quarterly(xtsSampleRub))

##         xtsSampleRub.Open xtsSampleRub.High xtsSampleRub.Low
## 2012 Q2            11.929            22.439            9.489
## 2012 Q3            18.960            21.896           17.883
##         xtsSampleRub.Close
## 2012 Q2             18.997
## 2012 Q3             20.550

cat("\nQuarterly Mean:\n")

## 
## Quarterly Mean:

head(lapply(split(xtsSampleRub,f="quarters"),FUN=mean))

## [[1]]
## [1] 15.87123
## 
## [[2]]
## [1] 19.42347

cat("\nMonthly OHLC:\n")

## 
## Monthly OHLC:

head(to.monthly(xtsSampleRub))

##          xtsSampleRub.Open xtsSampleRub.High xtsSampleRub.Low
## Apr 2012            11.929            13.665           10.914
## May 2012            12.546            18.891            9.489
## Jun 2012            17.207            22.439           12.995
## Jul 2012            18.960            21.896           17.883
##          xtsSampleRub.Close
## Apr 2012             12.589
## May 2012             17.254
## Jun 2012             18.997
## Jul 2012             20.550

cat("\nMonthly Mean:\n")

## 
## Monthly Mean:

head(lapply(split(xtsSampleRub,f="months"),FUN=mean))

## [[1]]
## [1] 12.27673
## 
## [[2]]
## [1] 14.09251
## 
## [[3]]
## [1] 17.82781
## 
## [[4]]
## [1] 19.42347

xtsSampleMfa Info

cat("\nSummary:\n")

## 
## Summary:

summary(xtsSampleMfa)

##      Index                      xtsSampleMfa  
##  Min.   :2012-04-30 00:15:00   Min.   :10.54  
##  1st Qu.:2012-05-15 21:11:15   1st Qu.:13.30  
##  Median :2012-05-31 18:07:30   Median :15.09  
##  Mean   :2012-05-31 18:07:30   Mean   :14.92  
##  3rd Qu.:2012-06-16 15:03:45   3rd Qu.:16.45  
##  Max.   :2012-07-02 12:00:00   Max.   :19.71

cat("\nStart:\n")

## 
## Start:

start(xtsSampleMfa)

## [1] "2012-04-30 00:15:00 IST"

cat("\nEnds:\n")

## 
## Ends:

end(xtsSampleMfa)

## [1] "2012-07-02 12:00:00 IST"

cat("\nFreq:\n")

## 
## Freq:

frequency(xtsSampleMfa)

## [1] 0.001111111

cat("\nIndex:\n")

## 
## Index:

head(index(xtsSampleMfa))

## [1] "2012-04-30 00:15:00 IST" "2012-04-30 00:30:00 IST"
## [3] "2012-04-30 00:45:00 IST" "2012-04-30 01:00:00 IST"
## [5] "2012-04-30 01:15:00 IST" "2012-04-30 01:30:00 IST"

cat("\nPeriodicity:\n")

## 
## Periodicity:

periodicity(xtsSampleMfa)

## 15 minute periodicity from 2012-04-30 00:15:00 to 2012-07-02 12:00:00

cat("\nYearly OHLC:\n")

## 
## Yearly OHLC:

to.yearly(xtsSampleMfa)

##            xtsSampleMfa.Open xtsSampleMfa.High xtsSampleMfa.Low
## 2012-07-02            12.689            19.709           10.541
##            xtsSampleMfa.Close
## 2012-07-02             17.819

cat("\nYearly Mean:\n")

## 
## Yearly Mean:

lapply(split(xtsSampleMfa,f="years"),FUN=mean)

## [[1]]
## [1] 14.92437

cat("\nQuarterly OHLC:\n")

## 
## Quarterly OHLC:

head(to.quarterly(xtsSampleMfa))

##         xtsSampleMfa.Open xtsSampleMfa.High xtsSampleMfa.Low
## 2012 Q2            12.689            19.709           10.541
## 2012 Q3            16.252            18.691           15.603
##         xtsSampleMfa.Close
## 2012 Q2             16.295
## 2012 Q3             17.819

cat("\nQuarterly Mean:\n")

## 
## Quarterly Mean:

head(lapply(split(xtsSampleMfa,f="quarters"),FUN=mean))

## [[1]]
## [1] 14.87504
## 
## [[2]]
## [1] 16.94893

cat("\nMonthly OHLC:\n")

## 
## Monthly OHLC:

head(to.monthly(xtsSampleMfa))

##          xtsSampleMfa.Open xtsSampleMfa.High xtsSampleMfa.Low
## Apr 2012            12.689            13.855           11.579
## May 2012            12.701            17.203           10.541
## Jun 2012            15.796            19.709           13.883
## Jul 2012            16.252            18.691           15.603
##          xtsSampleMfa.Close
## Apr 2012             12.769
## May 2012             15.834
## Jun 2012             16.295
## Jul 2012             17.819

cat("\nMonthly Mean:\n")

## 
## Monthly Mean:

head(lapply(split(xtsSampleMfa,f="months"),FUN=mean))

## [[1]]
## [1] 12.69096
## 
## [[2]]
## [1] 13.41241
## 
## [[3]]
## [1] 16.45846
## 
## [[4]]
## [1] 16.94893

Plot xtsSampleRub

#plot in base
plot(xtsSampleRub)

#plot using ggplot
autoplot(xtsSampleRub, ts.colour='blue') +
    labs(title="Times Series Plot") +
    labs(x="Months") +
    labs(y="Sample RUB")

Plot xtsSampleMfa

#plot in base
plot(xtsSampleMfa)

#plot using ggplot
autoplot(xtsSampleMfa, ts.colour='blue') +
    labs(title="Times Series Plot") +
    labs(x="Months") +
    labs(y="Sample MFA")

Observation:
1. For the columns rub and mfa we cannot see a stationary pattern.

ADF Test for xtsSampleRub

# Augmented Dickey-Fuller Test
adf.test(xtsSampleRub, alternative="stationary", k=0)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  xtsSampleRub
## Dickey-Fuller = -1.9731, Lag order = 0, p-value = 0.5898
## alternative hypothesis: stationary

Observation:
1. ADF test is performed to check the Stationary value of the xts data.
2. We see that p-value >0.05, which suggest that it is not a stationay patter.
3. For this particular assignment and for learning purpose we assume the xts data to be stationary.

ADF Test for xtsSampleMfa

# Augmented Dickey-Fuller Test
adf.test(xtsSampleMfa, alternative="stationary", k=0)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  xtsSampleMfa
## Dickey-Fuller = -2.0628, Lag order = 0, p-value = 0.5517
## alternative hypothesis: stationary

Plot ACF for xtsSampleRub

# Auto Correlation Function in base
tsSampleRubData <- as.ts(xtsSampleRub)
acf(log(tsSampleRubData))

# Auto Correlation Function
autoplot(acf(xtsSampleRub, plot = FALSE))

Plot ACF for xtsSampleMfa

# Auto Correlation Function in base
tsSampleMfaData <- as.ts(xtsSampleMfa)
acf(log(tsSampleMfaData))

# Auto Correlation Function
autoplot(acf(xtsSampleMfa, plot = FALSE))

Observation:
1. The pattern is above 0 for both the xts data.

Plot PACF for xtsSampleRub

# Partial Auto Correlation Function
tsSampleRubData <- as.ts(xtsSampleRub)
acf(diff(log(tsSampleRubData)))

autoplot(pacf(xtsSampleRub, plot = FALSE))

Plot PACF for xtsSampleMfa

# Partial Auto Correlation Function
tsSampleMfaData <- as.ts(xtsSampleMfa)
acf(diff(log(tsSampleMfaData)))

autoplot(pacf(xtsSampleMfa, plot = FALSE))

Observation:
1. The pattern is such thar some are above 0 and the rest below 0 for both the xts data.

Make ARIMA Model for xtsSampleRub

# get arima model (find best model)
armRubModel <- auto.arima(xtsSampleRub)
armRubModel

## Series: xtsSampleRub 
## ARIMA(5,1,1)                    
## 
## Coefficients:
##          ar1      ar2     ar3      ar4     ar5      ma1
##       1.9571  -1.0185  0.1645  -0.1499  0.0409  -0.9766
## s.e.  0.0131   0.0282  0.0309   0.0282  0.0130   0.0029
## 
## sigma^2 estimated as 0.0003703:  log likelihood=15431.69
## AIC=-30849.37   AICc=-30849.36   BIC=-30802.37

Observation:
1. This particular function helps to generate best fit model for the xts data file.

Make ARIMA Model for xtsSampleMfa

# get arima model (find best model)
armMfaModel <- auto.arima(xtsSampleMfa)
armMfaModel

## Series: xtsSampleMfa 
## ARIMA(4,1,4)                    
## 
## Coefficients:

## Warning in sqrt(diag(x$var.coef)): NaNs produced

##          ar1     ar2      ar3     ar4     ma1      ma2     ma3     ma4
##       0.4856  0.7682  -0.3657  -0.008  0.1278  -0.4095  0.2634  0.1029
## s.e.     NaN     NaN      NaN     NaN     NaN   0.0508     NaN     NaN
## 
## sigma^2 estimated as 0.0009886:  log likelihood=12441.01
## AIC=-24864.02   AICc=-24863.99   BIC=-24803.58

Observation:
1. This particular function helps to generate best fit model for the xts data file.

Forecast Rub column values Using ARIMA Model

# forecast using
fcRubData <- forecast(armRubModel,h=30)
fcRubData

##         Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 5486401       20.75225 20.72759 20.77691 20.71453 20.78996
## 5487301       20.94708 20.89236 21.00179 20.86340 21.03075
## 5488201       21.13238 21.04270 21.22205 20.99523 21.26953
## 5489101       21.30777 21.17789 21.43764 21.10914 21.50640
## 5490001       21.47257 21.29815 21.64698 21.20582 21.73931
## 5490901       21.62601 21.40354 21.84849 21.28577 21.96626
## 5491801       21.76750 21.49399 22.04101 21.34920 22.18580
## 5492701       21.89651 21.56949 22.22354 21.39637 22.39666
## 5493601       22.01261 21.63006 22.39516 21.42755 22.59767
## 5494501       22.11543 21.67577 22.55508 21.44303 22.78782
## 5495401       22.20469 21.70674 22.70263 21.44315 22.96623
## 5496301       22.28020 21.72316 22.83724 21.42828 23.13212
## 5497201       22.34186 21.72526 22.95846 21.39886 23.28486
## 5498101       22.38964 21.71336 23.06591 21.35537 23.42390
## 5499001       22.42358 21.68783 23.15933 21.29835 23.54882
## 5499901       22.44382 21.64908 23.23857 21.22837 23.65928
## 5500801       22.45057 21.59760 23.30354 21.14607 23.75508
## 5501701       22.44410 21.53393 23.35427 21.05211 23.83609
## 5502601       22.42475 21.45864 23.39087 20.94721 23.90230
## 5503501       22.39294 21.37236 23.41352 20.83210 23.95378
## 5504401       22.34914 21.27577 23.42251 20.70756 23.99072
## 5505301       22.29388 21.16957 23.41818 20.57440 24.01336
## 5506201       22.22773 21.05449 23.40097 20.43342 24.02205
## 5507101       22.15134 20.93131 23.37137 20.28547 24.01721
## 5508001       22.06537 20.80082 23.32993 20.13140 23.99935
## 5508901       21.97055 20.66381 23.27729 19.97206 23.96903
## 5509801       21.86760 20.52110 23.21411 19.80831 23.92690
## 5510701       21.75732 20.37353 23.14112 19.64100 23.87365
## 5511601       21.64050 20.22193 23.05908 19.47098 23.81003
## 5512501       21.51796 20.06711 22.96882 19.29908 23.73685

Forecast Mfa column values Using ARIMA Model

# forecast using
fcMfaData <- forecast(armMfaModel,h=30)
fcMfaData

##         Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 5486401       17.89291 17.85261 17.93320 17.83128 17.95453
## 5487301       17.96065 17.88416 18.03713 17.84367 18.07762
## 5488201       18.02405 17.90482 18.14328 17.84170 18.20640
## 5489101       18.08410 17.91551 18.25269 17.82627 18.34193
## 5490001       18.13661 17.91244 18.36078 17.79377 18.47945
## 5490901       18.18452 17.90132 18.46771 17.75141 18.61763
## 5491801       18.22566 17.88072 18.57060 17.69812 18.75320
## 5492701       18.26277 17.85509 18.67045 17.63927 18.88626
## 5493601       18.29446 17.82336 18.76555 17.57398 19.01493
## 5494501       18.32293 17.78872 18.85713 17.50593 19.13992
## 5495401       18.34720 17.75028 18.94412 17.43429 19.26012
## 5496301       18.36898 17.71028 19.02767 17.36159 19.37636
## 5497201       18.38754 17.66800 19.10708 17.28710 19.48798
## 5498101       18.40418 17.62504 19.18332 17.21258 19.59577
## 5499001       18.41836 17.58079 19.25593 17.13741 19.69931
## 5499901       18.43107 17.53642 19.32573 17.06282 19.79933
## 5500801       18.44191 17.49144 19.39238 16.98829 19.89553
## 5501701       18.45162 17.44667 19.45656 16.91469 19.98854
## 5502601       18.45990 17.40175 19.51804 16.84160 20.07819
## 5503501       18.46731 17.35727 19.57736 16.76965 20.16498
## 5504401       18.47364 17.31292 19.63436 16.69847 20.24880
## 5505301       18.47930 17.26913 19.68947 16.62851 20.33009
## 5506201       18.48413 17.22567 19.74259 16.55948 20.40878
## 5507101       18.48846 17.18285 19.79407 16.49170 20.48522
## 5508001       18.49215 17.14046 19.84383 16.42492 20.55937
## 5508901       18.49545 17.09875 19.89216 16.35938 20.63153
## 5509801       18.49827 17.05754 19.93900 16.29487 20.70168
## 5510701       18.50079 17.01701 19.98458 16.23154 20.77005
## 5511601       18.50295 16.97702 20.02887 16.16925 20.83665
## 5512501       18.50487 16.93770 20.07205 16.10808 20.90167

Plot Rub Forecast Using ARIMA Model

autoplot(fcRubData)

Observation:
1. The next 30 data points in the Rub column were plotted using ARIMA model

Plot Mfa Forecast Using ARIMA Model

autoplot(fcMfaData)

Observation:
1. The next 30 data points in the Mfa column were plotted using ARIMA model

Assignment_4

Eklavya Attar

4 August 2017