Sales of new single family houses, USA, from Jan 1987 through Nov 1995

houses = read.csv("sales-of-new-onefamily-houses-us.csv")

library(fpp)
## Loading required package: forecast
## Warning: package 'forecast' was built under R version 3.3.2
## Loading required package: fma
## Warning: package 'fma' was built under R version 3.3.2
## Loading required package: expsmooth
## Loading required package: lmtest
## Warning: package 'lmtest' was built under R version 3.3.2
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.3.2
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: tseries
## Warning: package 'tseries' was built under R version 3.3.2
library(forecast)
library(xts)
## Warning: package 'xts' was built under R version 3.3.2
library(tseries)

str(houses)
## 'data.frame':    107 obs. of  2 variables:
##  $ Month                                                              : Factor w/ 107 levels "1987-01","1987-02",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ Sales.of.new.one.family.houses..USA..from.Jan.1987.through.Nov.1995: int  53 59 73 72 62 58 55 56 52 52 ...
head(houses)
##     Month
## 1 1987-01
## 2 1987-02
## 3 1987-03
## 4 1987-04
## 5 1987-05
## 6 1987-06
##   Sales.of.new.one.family.houses..USA..from.Jan.1987.through.Nov.1995
## 1                                                                  53
## 2                                                                  59
## 3                                                                  73
## 4                                                                  72
## 5                                                                  62
## 6                                                                  58
tail (houses)
##       Month
## 102 1995-06
## 103 1995-07
## 104 1995-08
## 105 1995-09
## 106 1995-10
## 107 1995-11
##     Sales.of.new.one.family.houses..USA..from.Jan.1987.through.Nov.1995
## 102                                                                  64
## 103                                                                  64
## 104                                                                  63
## 105                                                                  55
## 106                                                                  54
## 107                                                                  44
names(houses)[2]<-c("sales")

train = houses[1:86,]
test = houses[87:107,]

time series

ts.houses =  ts(houses$sales,frequency=12,start=c(1987,1))
plot(ts.houses)

ts.train = ts(train$sales,frequency=12,start=c(1987,1))
plot(ts.train)

ts.test = ts(test$sales, frequency=12, start=c(1994,3))
plot(ts.test)

NNAR Model -> 1,1,2 = 1 lag, 1 seasonal lag, 2 hidden nodes

mynnet = nnetar(ts.train)
mynnet
## Series: ts.train 
## Model:  NNAR(1,1,2)[12] 
## Call:   nnetar(y = ts.train)
## 
## Average of 20 networks, each of which is
## a 2-2-1 network with 9 weights
## options were - linear output units 
## 
## sigma^2 estimated as 20.87
fcast.nnet=forecast(mynnet, h=21)
plot(fcast.nnet)
lines(ts.test, col="red")

acc.nnet=accuracy(fcast.nnet, ts.test)
acc.nnet
##                        ME     RMSE      MAE        MPE      MAPE      MASE
## Training set 0.0005970228 4.568286 3.535591 -0.9864026  7.443894 0.5306972
## Test set     0.0969991766 7.280519 5.979490 -1.8371097 11.140850 0.8975299
##                   ACF1 Theil's U
## Training set 0.2191136        NA
## Test set     0.4175379  1.033402

ETS

myets = ets(ts.train, model= "ZZZ")
myets
## ETS(M,N,A) 
## 
## Call:
##  ets(y = ts.train, model = "ZZZ") 
## 
##   Smoothing parameters:
##     alpha = 0.7982 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 56.4058 
##     s=-11.7973 -7.9963 -2.5084 -2.8245 3.2282 1.8863
##            4.9342 5.215 7.4446 8.2107 0.5368 -6.3292
## 
##   sigma:  0.0681
## 
##      AIC     AICc      BIC 
## 625.2495 632.1066 662.0647
fcast.myets=forecast(myets, h=21)
plot(fcast.myets)
lines(ts.test, col="red")

acc.ets=accuracy(fcast.myets, ts.test)
acc.ets
##                        ME     RMSE      MAE        MPE     MAPE      MASE
## Training set  0.003900202 3.522896 2.716488 -0.2988822 5.413551 0.4077487
## Test set     -1.082567311 4.743547 3.871966 -2.8693053 7.129235 0.5811877
##                      ACF1 Theil's U
## Training set -0.008011892        NA
## Test set      0.394655648 0.6921578

ARIMA

myarima = auto.arima(ts.train)
myarima
## Series: ts.train 
## ARIMA(0,1,0)(2,0,0)[12] 
## 
## Coefficients:
##         sar1    sar2
##       0.4384  0.3052
## s.e.  0.1060  0.1192
## 
## sigma^2 estimated as 19.9:  log likelihood=-250.92
## AIC=507.83   AICc=508.13   BIC=515.16
fcast.arima=forecast(myarima, h=21)
plot(fcast.arima)
lines(ts.test, col="red")

acc.arima=accuracy(fcast.arima, ts.test)
acc.arima
##                      ME     RMSE      MAE        MPE     MAPE      MASE
## Training set  0.1112226 4.382069 3.413523 -0.1643541  6.81773 0.5123747
## Test set     -4.5775931 7.856917 6.315492 -9.8907207 12.36728 0.9479644
##                     ACF1 Theil's U
## Training set -0.05945586        NA
## Test set      0.29839682  1.219595

Compare Accuracy

Looking at the graphs, ETS appeared to best fit the model. Neural net gets the general trend but not the extremes of the peaks and valleys. ARIMA also looks like it does fairly well at capturing the general trend - better than neural net; however, it does not look like it’s fitting the model as wel as ETS. Looking at the errors, Neural Net actually performs best according to ME and MPE and second best according to MASE and RMSE while ETS performs best according to RMSE, MAE, MAPE, and MASE. ARIMA performed second best in MAE and the worst in all other measures, which I found surprising after the visual comparison the neural net and arima graphs. Overall, ETS still appears to be the best fit after comparing the errors.

acc.nnet
##                        ME     RMSE      MAE        MPE      MAPE      MASE
## Training set 0.0005970228 4.568286 3.535591 -0.9864026  7.443894 0.5306972
## Test set     0.0969991766 7.280519 5.979490 -1.8371097 11.140850 0.8975299
##                   ACF1 Theil's U
## Training set 0.2191136        NA
## Test set     0.4175379  1.033402
acc.ets
##                        ME     RMSE      MAE        MPE     MAPE      MASE
## Training set  0.003900202 3.522896 2.716488 -0.2988822 5.413551 0.4077487
## Test set     -1.082567311 4.743547 3.871966 -2.8693053 7.129235 0.5811877
##                      ACF1 Theil's U
## Training set -0.008011892        NA
## Test set      0.394655648 0.6921578
acc.arima
##                      ME     RMSE      MAE        MPE     MAPE      MASE
## Training set  0.1112226 4.382069 3.413523 -0.1643541  6.81773 0.5123747
## Test set     -4.5775931 7.856917 6.315492 -9.8907207 12.36728 0.9479644
##                     ACF1 Theil's U
## Training set -0.05945586        NA
## Test set      0.29839682  1.219595