Load the fma and other packages:
library(fma)
library(ggplot2)
library(gridExtra)
library(forecast)
- For each of the following series (from the fma package), make a graph of the data. If transforming seems appropriate, do so and describe the effect.
- To plot a transformed data set, use plot(BoxCox(x,0.5)) where x is the name of the data set and 0.5 is the Box-Cox parameter.
autoplot(dole/1000, main = "Monthly Unemployment Benefit Rolls, Australia",
ylab = "Unemployment Benefit Recipients, '000s",
xlab ="Year")
autoplot(usdeaths/1000, main = "Monthly Accidental Death Count, US",
xlab = "Year", ylab = "Accidental Deaths, '000s")
autoplot(bricksq, main = "Quarterly Production of Bricks, millions",
xlab = "Year", ylab = "Bricks, millions")
- Consider the daily closing IBM stock prices (data set ibmclose). Produce some plots of the data in order to become familiar with it.
closing <- autoplot(ibmclose, main = "Daily Closing Price of IBM")
diff <- autoplot(diff(ibmclose), main = "Daily Change in Closing Price, $, IBM")
grid.arrange(closing, diff, ncol = 1)
checkresiduals(diff(ibmclose), main = "Difference In Daily Closing Price, IBM")
Box.test(diff(ibmclose), lag = 10, type = "Ljung")
##
## Box-Ljung test
##
## data: diff(ibmclose)
## X-squared = 14.064, df = 10, p-value = 0.1701
With a p-value > 0.17, the residuals are not significantly different from white-noise.
Split the data into a training set of 300 observations and a test set of 69 observations.
train = ibmclose[1:300]
test = ibmclose[301:369]
Try various benchmark methods to forecast the training set and compare the results on the test set. Which method did best?
# Predict using the mean of the train set
meann <- meanf(train, h = length(test))
#autoplot(meann)
#accuracy(meann, ibmclose)
# Predict using the last value of the train set
last <- naive(train, h = length(test))
autoplot(last)
accuracy(last, ibmclose)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.2809365 7.302815 5.09699 -0.08262872 1.115844 1.000000
## Test set -3.7246377 20.248099 17.02899 -1.29391743 4.668186 3.340989
## ACF1 Theil's U
## Training set 0.1351052 NA
## Test set 0.9314689 2.973486
# Predict using a simple trend line between the first and last day in the train set
drift <- rwf(train, h= length(test), drift=TRUE)
autoplot(drift)
accuracy(drift, ibmclose)
## ME RMSE MAE MPE MAPE
## Training set 2.870480e-14 7.297409 5.127996 -0.02530123 1.121650
## Test set 6.108138e+00 17.066963 13.974747 1.41920066 3.707888
## MASE ACF1 Theil's U
## Training set 1.006083 0.1351052 NA
## Test set 2.741765 0.9045875 2.361092
meants <- ts(rep(mean(train), length(test)), start = length(train) + 1)
plot(ibmclose, plot.conf=FALSE,
main="Forecasts for IBM Closing Price")
## Warning in plot.window(xlim, ylim, log, ...): "plot.conf" is not a
## graphical parameter
## Warning in title(main = main, xlab = xlab, ylab = ylab, ...): "plot.conf"
## is not a graphical parameter
## Warning in axis(1, ...): "plot.conf" is not a graphical parameter
## Warning in axis(2, ...): "plot.conf" is not a graphical parameter
## Warning in box(...): "plot.conf" is not a graphical parameter
lines(meants, col = 4)
lines(last$mean,col=2)
lines(drift$mean,col=3)
legend("topright",lty=1,col=c(4,2,3),
legend=c("Mean","Naive","Drift"))