# loading libraries
library(fpp2)
## Loading required package: ggplot2
## Loading required package: forecast
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Loading required package: fma
## Loading required package: expsmooth
#library(ggplot2)
library(gridExtra)
For usnetelec, usgdp, mcopper, enplanements datasets, find an appropriate Box-Cox transformation in order to stabilise the variance.
l1 <- BoxCox.lambda(usnetelec) # transformed
print(l1)
## [1] 0.5167714
a <- autoplot(usnetelec) + labs(title="Non transformed")
b <- autoplot(BoxCox(usnetelec,l1)) + labs(title = "BoxCox Transformation")
grid.arrange(a, b,nrow=1)
l1 <- BoxCox.lambda(usgdp) # transformed
print(l1)
## [1] 0.366352
a <- autoplot(usgdp) + labs(title="Non transformed")
b <- autoplot(BoxCox(usgdp,l1)) + labs(title = "BoxCox Transformation")
grid.arrange(a, b,nrow=1)
## mcopper dataset
l1 <- BoxCox.lambda(mcopper) # transformed
print(l1)
## [1] 0.1919047
a <- autoplot(mcopper) + labs(title="Non transformed")
b <- autoplot(BoxCox(mcopper,l1)) + labs(title = "BoxCox Transformation")
grid.arrange(a, b,nrow=1)
l1 <- BoxCox.lambda(enplanements) # transformed
print(l1)
## [1] -0.2269461
a <- autoplot(enplanements) + labs(title="Non transformed")
b <- autoplot(BoxCox(enplanements,l1)) + labs(title = "BoxCox Transformation")
grid.arrange(a, b,nrow=1)
As we can see, some of the datasets had more variance and some did not have.
Why is a Box-Cox transformation unhelpful for the cangas data?
l1 <- BoxCox.lambda(cangas) # transformed
print(l1)
## [1] 0.5767759
a <- autoplot(cangas) + labs(title="Non transformed")
b <- autoplot(BoxCox(cangas,l1)) + labs(title = "BoxCox Transformation")
grid.arrange(a, b,nrow=1)
In this case, transformation was not needed because the data did not have abnormality that’s why we don’t see any difference before and after transformation in the above plot.
What Box-Cox transformation would ytou select for your retail data (from Exercise 3 in Section 2.10)?
retail <- readxl::read_excel("retail.xlsx", skip = 1) # reading the excel file
tsdata <- ts(retail[,"A3349718A"], frequency = 12, start=c(1982,4)) # converting into timeseries data
a <- autoplot(tsdata) + labs(title="Non-Transformed Plot") # creating autoplot for selected column
# Box Cox Transformation
l1 <- BoxCox.lambda(tsdata)
print(paste0("The lambda value is ", l1))
## [1] "The lambda value is 0.186608760950338"
# Plotting the Box Cox Transformation
b <- autoplot(BoxCox(tsdata, l1))+ labs(title="BoxCox Transformation")
# Plotting both together
grid.arrange(a, b, nrow=1)
Overall, the trend is upwards and it is increasing throughout the time with slight stability between 2000 and 2003. It went up again after 2003. The chosen lambda value is 0.186
For your retail time series (from Exercise 3 in Section 2.10):
myts.train <- window(tsdata, end=c(2010,12)) # Training data between 1982 and 2010 with monthly data
myts.test <- window(tsdata, start=c(2011)) # Test data splitted after 2010
autoplot(tsdata) +
autolayer(myts.train, series="Training")+
autolayer(myts.test, series= "Test")
fc <- snaive(myts.train)
autoplot(fc)
accuracy(fc, myts.test)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 7.073574 15.00116 11.52823 6.790115 9.574892 1.000000 0.8180097
## Test set 25.916667 28.98111 26.82500 10.150224 10.554647 2.326897 0.4757224
## Theil's U
## Training set NA
## Test set 2.009329
The errors for training and test sets are significantly different. Test dataset seem to have higher error as compared with training dataset.