## Warning: package 'fpp' was built under R version 3.3.1
## Warning: package 'fma' was built under R version 3.3.1
## Warning: package 'expsmooth' was built under R version 3.3.1
## Warning: package 'lmtest' was built under R version 3.3.1

Exercise 2.1

Monthly total of people on unemployed benefits in Australia (January 1956–July 1992)

require(fpp) # fpp package needs to be loaded once in the beginning of every R session
lambda = BoxCox.lambda(dole)
plot(BoxCox(dole, lambda), xlab = "Time", 
  ylab = paste("BoxCox(# people,", round(lambda, 2), ")"))

The data was transformed using Box-Cox transformation with parameter \(\lambda=0.33\).

Monthly total of accidental deaths in the United States (January 1973–December 1978)

plot(usdeaths, xlab = "Time", ylab = "# deaths")

Quarterly production of bricks (in millions of units) at Portland, Australia (March 1956–September 1994)

lambda = BoxCox.lambda(bricksq)
plot(BoxCox(bricksq, lambda), xlab = "Time", 
     ylab = paste("BoxCox(# mln bricks,", round(lambda, 2), ")"))

The data was transformed using Box-Cox transformation with parameter \(\lambda=0.25\).

\newpage

Exercise 2.2

Time plot of the Dow Jones index:

plot(dowjones, xlab = "Time", ylab = "Value $")

Forecasts using the drift method:

plot(rwf(dowjones, drift = TRUE, h = 20), xlab = "Time", ylab = "Value $", main = "")

Graphed forecasts are identical to extending the line drawn between the first and last observations:

plot(rwf(dowjones, drift = TRUE, h = 20), xlab = "Time", ylab = "Value $", main = "")
slope = (tail(dowjones, 1) - head(dowjones, 1)) / (length(dowjones) - 1)
intercept = head(dowjones, 1) - slope # Since time starts from 1
abline(intercept, slope, lty = 2, col = "red")

Some other benchmark methods:

plot(rwf(dowjones, drift=TRUE, h=20, level=0), xlab="Time", ylab="Value $", main="")
lines(naive(dowjones, h=20, level=0)$mean, xlab="", ylab="", main="", col="green")
lines(meanf(dowjones, h=20, level=0)$mean, xlab="", ylab="", main="", col="red")
# "$mean" after the function calls above is used
# to extract vector of the forecasted values from the function output
legend("topleft",
  legend = c("Random walk with drift", "Random walk without drift", "Mean forecast"),
  col = c("blue", "green", "red"), lty=1)

Random Walk Forecast (rwf) method with and without drift might be the best forecasting methods.

\newpage

Exercise 2.3

Plot of daily closing IBM stock prices (data set ibmclose):

plot(ibmclose, xlab = "Time", ylab = "Value $", main = "")

Splitting the data into a training set of 300 observations and a test set of 69 observations:

trainingSet = window(ibmclose, 1, 300)
testSet = window(ibmclose, 301, 369)

Benchmark methods are used to forecast the training set. The results are compared on the test set.

rwfForecast = rwf(trainingSet, h = 69)$mean
rwfWithDriftForecast = rwf(trainingSet, drift = TRUE, h = 69)$mean
meanfForecast = meanf(trainingSet, h = 69)$mean
# Comparing the above forecasts
accuracy(rwfForecast, testSet)
                ME    RMSE      MAE       MPE     MAPE      ACF1 Theil's U
Test set -3.724638 20.2481 17.02899 -1.293917 4.668186 0.9314689  2.973486
accuracy(rwfWithDriftForecast, testSet)
               ME     RMSE      MAE      MPE     MAPE      ACF1 Theil's U
Test set 6.108138 17.06696 13.97475 1.419201 3.707888 0.9045875  2.361092
accuracy(meanfForecast, testSet)
               ME     RMSE     MAE       MPE     MAPE      ACF1 Theil's U
Test set -130.618 132.1256 130.618 -35.47882 35.47882 0.9314689  19.05515

According to RMSE (Root Mean Square Error) the best forecast is provided by the Random Walk Forecast with Drift method.
If MAE (Mean Absolute Error) or MAPE (Mean Absolute Percentage Error) are considered, they give the same result.

\newpage

Exercise 2.4

Plot of monthly sales of new one-family houses in the USA, Jan 1973 – Nov 1995 (data set hsales):

plot(hsales, xlab = "Time", ylab = "Sales", main = "")

Splitting the hsales data set into a training set and a test set, where the test set is the last two years of data.

trainingSet = window(hsales, end = c(1993,12))
testSet = window(hsales, start = c(1994,1))

Benchmark methods are used to forecast the training set. The results are compared on the test set.

rwfForecast = rwf(trainingSet, h = 23)$mean
rwfWithDriftForecast = rwf(trainingSet, drift = TRUE, h = 23)$mean
meanfForecast = meanf(trainingSet, h = 23)$mean
snaiveForecast = snaive(trainingSet, h = 23)$mean
# Comparing the above forecasts
accuracy(rwfForecast, testSet)
         ME     RMSE      MAE      MPE     MAPE      ACF1 Theil's U
Test set  5 9.670664 8.304348 6.808018 14.38167 0.5095178  1.179633
accuracy(rwfWithDriftForecast, testSet)
               ME     RMSE      MAE      MPE     MAPE      ACF1 Theil's U
Test set 5.191235 9.761548 8.393037 7.159951 14.50303 0.5083059  1.188562
accuracy(meanfForecast, testSet)
               ME     RMSE      MAE     MPE     MAPE      ACF1 Theil's U
Test set 4.051587 9.216133 7.850759 5.07499 13.75973 0.5095178   1.13105
accuracy(snaiveForecast, testSet)
                ME     RMSE MAE        MPE    MAPE     ACF1 Theil's U
Test set 0.3043478 6.160886   5 -0.7312374 9.12828 0.224307 0.8031005

According to RMSE (Root Mean Square Error) the best forecast is provided by the Seasonal Naive method.
If MAE (Mean Absolute Error) or MAPE (Mean Absolute Percentage Error) are considered, they give the same result.