#install.packages(c("quantmod", "tseries", "timeSeries", "forecast", "xts"))
Today, we learned about ARIMA (autoregressive integrated moving average) and how to fit their models for forecasting.
library(quantmod)
## Warning: package 'quantmod' was built under R version 3.4.4
## Loading required package: xts
## Warning: package 'xts' was built under R version 3.4.4
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.4.4
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
library(tseries)
## Warning: package 'tseries' was built under R version 3.4.4
library(timeSeries)
## Warning: package 'timeSeries' was built under R version 3.4.4
## Loading required package: timeDate
## Warning: package 'timeDate' was built under R version 3.4.4
##
## Attaching package: 'timeSeries'
## The following object is masked from 'package:zoo':
##
## time<-
library(forecast)
## Warning: package 'forecast' was built under R version 3.4.4
library(xts)
The first part of understanding ARIMA models is the first two letters (AR). AR means autoregressive. To plot a autoregressive model, use the arima.sim command in this fashion. Only use the first part of the three part order call. Only use (1,0,0).
par(mfrow=c(2,1))
plot(arima.sim(list(order=c(1,0,0),ar=.9) , n=200), ylab = "y", main = (expression(AR(1) ~~~ phi == +.9)))
abline(0,0)
plot(arima.sim(list(order=c(1,0,0),ar= -.9) , n=200), ylab = "y", main = (expression(AR(1) ~~~ phi == -.9)))
abline(0,0)
The 1 in the call is the order number. The AR is 0.9 with n=200 observations. We see more volatility in a negative -0.9 AR model because there are more swapping correlations.
set.seed(2115)
fakedata <- arima.sim(list(order=c(1,0,0),ar=.9) , n=200)
plot(fakedata)
mod1 <- auto.arima(fakedata)
mod1
## Series: fakedata
## ARIMA(1,0,0) with zero mean
##
## Coefficients:
## ar1
## 0.9266
## s.e. 0.0285
##
## sigma^2 estimated as 1.004: log likelihood=-284.66
## AIC=573.32 AICc=573.38 BIC=579.92
auto.arima fits the model for us, finding the point estimate of the first part of the order call. (AR,0,0)
Our AR order is 0.9266, so if we wanted to simulate this model, we would use (0.9266,0,0). The standard error is 0.0285.
To forecast use the forecast() function including a model and the number of units of time that you would like to forecast out.
mycast1 <- forecast(mod1, h=1)
plot(mycast1)
Forecasting out 1 day, you can see the point prediction.
plot(forecast(mod1, h=10))
When you forecast out further, you can see the interval, which increases in range the farther out you forecast.
par(mfrow = c(2,1))
plot(arima.sim(list(order=c(0,0,1), ma=.9), n = 200) , ylab = "y", main = (expression(MA(1) ~~~ theta == +.9)))
plot(arima.sim(list(order=c(0,0,1), ma=-.9), n = 200) , ylab = "y", main = (expression(MA(1) ~~~ theta == -.9)))
Now we are setting the value of the moving average component of the ARIMA model (MA). As you can see above, the negative MA value also has more volatility.
# Let's fit a model using simulated MA(1) data
set.seed(2.718)
ma.data <- arima.sim(list(order=c(0,0,1), ma=.9), n = 200)
mod2 <- auto.arima(ma.data)
mod2
## Series: ma.data
## ARIMA(1,0,2) with zero mean
##
## Coefficients:
## ar1 ma1 ma2
## -0.8354 1.723 0.7496
## s.e. 0.2317 0.237 0.2105
##
## sigma^2 estimated as 1.143: log likelihood=-296.35
## AIC=600.71 AICc=600.91 BIC=613.9
The third number is the q, the order of the moving average. Now, we can call an ARMA model, using the p and q terms, but we are missing the I from ARIMA.
Forecasting is the exact same for every ARIMA model. Once the model is fit, all you have to do it specify the forecast length.
plot(forecast(mod2, h=1)) # 1 day into the future
forecast(mod2, h=1)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 201 0.3159447 -1.054301 1.686191 -1.779666 2.411555
plot(forecast(mod2, h=10)) # 10 days into the future
set.seed(1234)
arma.data <- arima.sim(list(order=c(1,0,1), ma=.9, ar = .9), n = 200)
mod3 <- auto.arima(arma.data)
mod3
## Series: arma.data
## ARIMA(4,0,4) with non-zero mean
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 ma2 ma3 ma4
## 1.1663 -0.5789 0.8841 -0.5949 0.7172 0.0696 -0.4784 -0.6420
## s.e. 0.3502 0.5705 0.3427 0.1475 0.3347 0.2136 0.2604 0.1482
## mean
## 1.989
## s.e. 0.372
##
## sigma^2 estimated as 0.9662: log likelihood=-278.04
## AIC=576.09 AICc=577.25 BIC=609.07
Using the auto.arima command, we can create an ARIMA model only using the ARMA variables by leaving the middle number in the parenthesis as zero.
plot(forecast(mod3, h=1))
forecast(mod3, h=1)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 201 2.299482 1.039752 3.559213 0.3728907 4.226074
plot(forecast(mod3, h=10))
We will now fit ARIMA models for some more data to practice!
plot(WWWusage)
mod4 <- auto.arima(WWWusage)
mod4
## Series: WWWusage
## ARIMA(1,1,1)
##
## Coefficients:
## ar1 ma1
## 0.6504 0.5256
## s.e. 0.0842 0.0896
##
## sigma^2 estimated as 9.995: log likelihood=-254.15
## AIC=514.3 AICc=514.55 BIC=522.08
mycast <- forecast(mod4,h=20)
plot(mycast)
############# Money time
# Pull data from Yahoo finance
getSymbols('TECHM.NS', from='2012-01-01', to='2015-01-01')
## 'getSymbols' currently uses auto.assign=TRUE by default, but will
## use auto.assign=FALSE in 0.5-0. You will still be able to use
## 'loadSymbols' to automatically load data. getOption("getSymbols.env")
## and getOption("getSymbols.auto.assign") will still be checked for
## alternate defaults.
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.
##
## WARNING: There have been significant changes to Yahoo Finance data.
## Please see the Warning section of '?getSymbols.yahoo' for details.
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.yahoo.warning"=FALSE).
## Warning: TECHM.NS contains missing values. Some functions will not work if
## objects contain missing values in the middle of the series. Consider using
## na.omit(), na.approx(), na.fill(), etc to remove or replace them.
## [1] "TECHM.NS"
# Select the relevant close price series
stock_prices <- TECHM.NS[,4]
mod5 <- auto.arima(stock_prices)
mycast5 <- forecast(mod5, h=10)
plot(mycast5)
mod6 <- auto.arima(log(stock_prices))
mycast6 <- forecast(mod6, h=10)
plot(mycast6)
par(mfrow=c(2,1))
plot(mycast5)
plot(mycast6)
###### YOUR TURN
# Fit ARIMA models with various data sets and do prediction
data("AirPassengers")
library(astsa); data(globtemp)
##
## Attaching package: 'astsa'
## The following object is masked from 'package:forecast':
##
## gas
data(jj)
mod7 <- auto.arima(AirPassengers)
mod7
## Series: AirPassengers
## ARIMA(2,1,1)(0,1,0)[12]
##
## Coefficients:
## ar1 ar2 ma1
## 0.5960 0.2143 -0.9819
## s.e. 0.0888 0.0880 0.0292
##
## sigma^2 estimated as 132.3: log likelihood=-504.92
## AIC=1017.85 AICc=1018.17 BIC=1029.35
mycast7 <- forecast(mod7, h=10)
plot(mycast7)
mod8 <- auto.arima(globtemp)
mod8
## Series: globtemp
## ARIMA(1,1,1) with drift
##
## Coefficients:
## ar1 ma1 drift
## 0.3549 -0.7663 0.0072
## s.e. 0.1314 0.0874 0.0032
##
## sigma^2 estimated as 0.01011: log likelihood=119.88
## AIC=-231.76 AICc=-231.46 BIC=-220.14
mycast8 <- forecast(mod8, h=10)
plot(mycast8)
mod9 <- auto.arima(jj)
mod9
## Series: jj
## ARIMA(1,1,2)(0,1,0)[4]
##
## Coefficients:
## ar1 ma1 ma2
## -0.7921 -0.0970 -0.3945
## s.e. 0.1396 0.1802 0.1580
##
## sigma^2 estimated as 0.1834: log likelihood=-44.07
## AIC=96.14 AICc=96.68 BIC=105.61
mycast9 <- forecast(mod9, h=10)
plot(mycast9)
data(gas)
mod10 <- auto.arima(gas)
mod10
## Series: gas
## ARIMA(0,1,3)
##
## Coefficients:
## ma1 ma2 ma3
## 0.0510 0.0647 0.1447
## s.e. 0.0426 0.0401 0.0426
##
## sigma^2 estimated as 73.32: log likelihood=-1938.65
## AIC=3885.29 AICc=3885.37 BIC=3902.49
mycast10 <- forecast(mod10, h=10)
plot(mycast10)
data(gold)
mod11 <- auto.arima(gold)
mod11
## Series: gold
## ARIMA(1,1,2) with drift
##
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
## ar1 ma1 ma2 drift
## -0.1628 -0.1744 -0.0552 0.0713
## s.e. NaN NaN NaN 0.1151
##
## sigma^2 estimated as 32.47: log likelihood=-3410.82
## AIC=6831.64 AICc=6831.7 BIC=6856.69
mycast11 <- forecast(mod11, h=10)
plot(mycast11)