For each of the following series (from the fma package), make a graph of the data. If transforming seems appropriate, do so and describe the effect.
Monthly total of people on unemployed benefits in Australia (January 1956–July 1992).
library("fma")
## Loading required package: forecast
head(dole,20)
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov
## 1956 4742 6128 6494 5379 6011 7003 9164 10333 9614 9545 10096
## 1957 15711 13135 13077 15453 15995 18071 20291 20175
## Dec
## 1956 13277
## 1957
#First, a general plot
plot(dole,
main="Unemployed benefits in Australia",
xlab="Year", ylab="People")
#Seasonal plot:
#no obvious seasonal patterns, other than some decline throughout the year with a correction in December
seasonplot(dole, ylab="People", xlab="Year",
main="Season plot: Unemployed benefits in Australia",
year.labels=TRUE, year.labels.left=TRUE, col=1:20, pch=19)
#Attempt to smooth months out by days
monthdays <- rep(c(31,28,31,30,31,30,31,31,30,31,30,31),36)
monthdays[26 + (4*12)*(0:8)] <- 29
par(mfrow=c(2,1))
plot(dole, main="Monthly total of people on unemployed benefits in Australia",
ylab="People", xlab="Years")
plot(dole/monthdays, main="Average total of people on unemployed benefits in Australia per day",
ylab="People", xlab="Years")
## Warning in `/.default`(dole, monthdays): longer object length is not a
## multiple of shorter object length
#Correct for population growth (data from http://www.abs.gov.au/AUSSTATS/abs@.nsf/DetailsPage/3105.0.65.0012014?OpenDocument)
yearpop <- c(4828846, 4930160, 5026095, 5132363, 5253073, 5374304, 5470040, 5571613, 5682962, 5793629, 5890642, 5992280, 6108185, 6238338, 6364877, 6632838, 6735679, 6835488, 6941940, 7002232, 7065779, 7145407, 7213574, 7293341, 7391427, 7514339, 7633179, 7730415, 7826368, 7940033, 8057320, 8181746, 8325068, 8446656, 8560533, 8659623, 8744793)
yearpopmon <- rep(yearpop, each=12)
mondiff <- length(yearpopmon) - length(dole)
yearpopmon <- yearpopmon[1:(length(yearpopmon)-mondiff)]
par(mfrow=c(2,1))
plot(dole, main="Monthly total of people on unemployed benefits in Australia",
ylab="People", xlab="Years")
plot(dole/yearpopmon, main="Average total of people on unemployed benefits in Australia per day",
ylab="People", xlab="Years")
Monthly total of accidental deaths in the United States (January 1973–December 1978).
usdeaths
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov
## 1973 9007 8106 8928 9137 10017 10826 11317 10744 9713 9938 9161
## 1974 7750 6981 8038 8422 8714 9512 10120 9823 8743 9129 8710
## 1975 8162 7306 8124 7870 9387 9556 10093 9620 8285 8433 8160
## 1976 7717 7461 7776 7925 8634 8945 10078 9179 8037 8488 7874
## 1977 7792 6957 7726 8106 8890 9299 10625 9302 8314 8850 8265
## 1978 7836 6892 7791 8129 9115 9434 10484 9827 9110 9070 8633
## Dec
## 1973 8927
## 1974 8680
## 1975 8034
## 1976 8647
## 1977 8796
## 1978 9240
plot(usdeaths)
#Seasonal plot:
#more accidental deaths around the summer months, peaking in July
seasonplot(usdeaths, ylab="People", xlab="Year",
main="Season plot: Accidental deaths in America",
year.labels=TRUE, year.labels.left=TRUE, col=1:20, pch=19)
#correct for days (although probably not necessary)
monthdays <- rep(c(31,28,31,30,31,30,31,31,30,31,30,31),6)
par(mfrow=c(2,1))
plot(usdeaths/monthdays, main="Average total of accidental deaths per day",
ylab="People", xlab="Years")
plot(usdeaths, main="Average total of accidental deaths per month",
ylab="People", xlab="Years")
Quarterly production of bricks (in millions of units) at Portland, Australia (March 1956–September 1994).
par(mfrow=c(3,1))
plot(bricksq, ylab="Bricks (Millions)", xlab="Year", main="Quarterly production of bricks at Portland, Australia")
#Log transformation
plot(log(bricksq), ylab="Transofrmed brick production", xlab="year", main="Transformed quarterly brick production")
title(main="Log",line=-1)
#box-cox
lambda <-BoxCox.lambda(bricksq)
plot(BoxCox(bricksq,lambda))
title(main="BoxCox",line=-1)
Consider the daily closing IBM stock prices (data set ibmclose). Produce some plots of the data in order to become familiar with it.
par(mfrow=c(2,1))
plot(ibmclose, main="Closing IBM stock prices", ylab="IBM stock price", xlab="Day")
#boxcox
labmda <-BoxCox.lambda(ibmclose)
plot(BoxCox(ibmclose,lambda))
Split the data into a training set of 300 observations and a test set of 69 observations.
set.seed(101)
sample <- sample.int(n = 369, size = 300, replace = F)
fulllist <- 0:369
rsample <- fulllist[!(fulllist %in% sample)]
train <- ibmclose[sort(sample)]
test <- ibmclose[sort(rsample)]
train1 <- ibmclose[0:300]
test1 <- ibmclose[301:369]
Try various benchmark methods to forecast the training set and
ibm2 <- window(ibmclose, start=1, end=300)
ibmfit1 <- meanf(ibm2, h=69)
ibmfit2 <- rwf(ibm2, h=69)
ibmfit3 <- snaive(ibm2, h=69)
plot(ibmfit1)
lines(ibmfit2$mean,col=2)
lines(ibmfit3$mean,col=3)
lines(ibmclose)
#We see that the naive and drift give the same forecast here
compare the results on the test set. Which method did best?
ibm3 <- window(ibmclose, start=301)
accuracy(ibmfit1, ibm3)
## ME RMSE MAE MPE MAPE
## Training set 1.660438e-14 73.61532 58.72231 -2.642058 13.03019
## Test set -1.306180e+02 132.12557 130.61797 -35.478819 35.47882
## MASE ACF1 Theil's U
## Training set 11.52098 0.9895779 NA
## Test set 25.62649 0.9314689 19.05515
accuracy(ibmfit2, ibm3)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.2809365 7.302815 5.09699 -0.08262872 1.115844 1.000000
## Test set -3.7246377 20.248099 17.02899 -1.29391743 4.668186 3.340989
## ACF1 Theil's U
## Training set 0.1351052 NA
## Test set 0.9314689 2.973486
accuracy(ibmfit3, ibm3)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.2809365 7.302815 5.09699 -0.08262872 1.115844 1.000000
## Test set -3.7246377 20.248099 17.02899 -1.29391743 4.668186 3.340989
## ACF1 Theil's U
## Training set 0.1351052 NA
## Test set 0.9314689 2.973486
#The naive and drift methods perform far better than the simple mean forecast