usnetelec
: 0.52BoxCox_transformed <- BoxCox(usnetelec,BoxCox.lambda(usnetelec))
a <- autoplot(cbind(usnetelec,BoxCox_transformed)) +
ylab('') +
xlab('') +
ggtitle(paste('Lambda = ',round(BoxCox.lambda(usnetelec),2))) +
theme(legend.position="bottom") + theme(legend.title=element_blank())
b <- autoplot(cbind(usnetelec,BoxCox_transformed),facet=TRUE) + ylab('') + xlab('')
grid.arrange(a, b, nrow = 1)
usgdp
: 0.37BoxCox_transformed <- BoxCox(usgdp,BoxCox.lambda(usgdp))
par(mfrow=c(1,2))
a <- autoplot(cbind(usgdp,BoxCox_transformed)) +
ylab('') +
xlab('') +
ggtitle(paste('Lambda = ',round(BoxCox.lambda(usgdp),2))) +
theme(legend.position="bottom") + theme(legend.title=element_blank())
b <- autoplot(cbind(usgdp,BoxCox_transformed),facet=TRUE) + ylab('') + xlab('')
grid.arrange(a, b, nrow = 1)
mcopper
: 0.19BoxCox_transformed <- BoxCox(mcopper,BoxCox.lambda(mcopper))
a <- autoplot(cbind(mcopper,BoxCox_transformed)) + ylab('') + xlab('') +
ggtitle(paste('Lambda = ',round(BoxCox.lambda(mcopper),2))) +
theme(legend.position="bottom") + theme(legend.title=element_blank())
b <- autoplot(cbind(mcopper,BoxCox_transformed),facet=TRUE) + ylab('') + xlab('')
grid.arrange(a, b, nrow = 1)
enplanements
: -0.23BoxCox_transformed <- BoxCox(enplanements,BoxCox.lambda(enplanements))
a <- autoplot(cbind(enplanements,BoxCox_transformed)) + ylab('') +
xlab('') +
ggtitle(paste('Lambda = ',round(BoxCox.lambda(enplanements),2))) +
theme(legend.position="bottom") + theme(legend.title=element_blank())
b <- autoplot(cbind(enplanements,BoxCox_transformed),facet=TRUE) + ylab('') + xlab('')
grid.arrange(a, b, nrow = 1)
BoxCox_transformed <- BoxCox(cangas,BoxCox.lambda(cangas))
a <- autoplot(cbind(cangas,BoxCox_transformed)) + ylab('') + xlab('') +
ggtitle(paste('Lambda = ',round(BoxCox.lambda(cangas),2))) +
theme(legend.position="bottom") + theme(legend.title=element_blank())
b <- autoplot(cbind(cangas,BoxCox_transformed),facet=TRUE) + ylab('') + xlab('')
grid.arrange(a, b, nrow = 1)
Per Hyndman and Athanasopoulos, transformation can be useful if the data show variation that increases or decreases with the level of series, which does not seem to be the case with cangas
where variation increases in the middle of the series, after ~1975, and shrinks back to the same levels after ~1990, hence rendering Box-Cox useless.
download.file('https://otexts.com/fpp2/extrafiles/retail.xlsx','retail.xlsx')
retaildata <- readxl::read_excel("retail.xlsx", skip=1)
myts <- ts(retaildata[,"A3349398A"],
frequency=12, start=c(1982,4))
BoxCox_transformed <- BoxCox(myts,BoxCox.lambda(myts))
a <- autoplot(cbind(myts,BoxCox_transformed)) + ylab('') + xlab('') +
ggtitle(paste('Lambda = ',round(BoxCox.lambda(myts),2))) +
theme(legend.position="bottom") + theme(legend.title=element_blank())
b <- autoplot(cbind(myts,BoxCox_transformed),facet=TRUE) + ylab('') + xlab('')
grid.arrange(a, b, nrow = 1)
myts.train <- window(myts, end=c(2010,12))
myts.test <- window(myts, start=2011)
autoplot(myts) +
autolayer(myts.train, series="Training") +
autolayer(myts.test, series="Test") +
theme(legend.title=element_blank())
fc <- snaive(myts.train)
autoplot(myts) +
autolayer(fc,series='Naive') + theme(legend.title=element_blank())
accuracy(fc,myts.test)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 73.94114 88.31208 75.13514 6.068915 6.134838 1.000000 0.6312891
## Test set 115.00000 127.92727 115.00000 4.459712 4.459712 1.530576 0.2653013
## Theil's U
## Training set NA
## Test set 0.7267171
checkresiduals(fc)
##
## Ljung-Box test
##
## data: Residuals from Seasonal naive method
## Q* = 671.41, df = 24, p-value < 2.2e-16
##
## Model df: 0. Total lags used: 24
Do the residuals appear to be uncorrelated and normally distributed?
No, as evident from ACF plot, the residuals appear to be correlated meaning there is information present that is not captured by naive method. Ljung-Box test result also confirms that large Q* value is significant and residuals are not from a white noise series. The distribution appears to be slightly right skewed indicating non-normality. Additionally, the residuals fan out indicating that the variance is not constant. When forecast is plotted against actual values, we can see that trend is not captured and forecast values don’t go up as much as actual data.
Splitting data for train and test at a couple of different points indicates that accuracy measures are quite sensitive to the split as the metrics vary significantly. In all cases, however, accuracy on test is much worse than on train set meaning the model fit is not appropriate and does not generalize well enough.
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 74.29751 88.86355 75.4676 6.197542 6.263306 1.000000 0.6392016
## Test set 104.89583 120.93167 106.6792 4.180821 4.249077 1.413576 0.4949489
## Theil's U
## Training set NA
## Test set 0.6631145
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 72.14725 86.06616 73.36278 6.233472 6.301790 1.000000 0.6555584
## Test set 161.87083 174.58604 161.87083 6.524281 6.524281 2.206443 0.3346350
## Theil's U
## Training set NA
## Test set 0.9578964
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 73.39966 86.51123 73.9202 6.413359 6.450312 1.000000 0.6485357
## Test set 105.98333 138.04400 115.1917 4.390494 4.812791 1.558325 0.6124800
## Theil's U
## Training set NA
## Test set 0.7965429