# loading libraries
library(fpp2)

## Loading required package: ggplot2

## Loading required package: forecast

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## Loading required package: fma

## Loading required package: expsmooth

#library(ggplot2)
library(gridExtra)

Exercise 3.1

For usnetelec, usgdp, mcopper, enplanements datasets, find an appropriate Box-Cox transformation in order to stabilise the variance.

usnetelec dataset

l1 <- BoxCox.lambda(usnetelec) # transformed
print(l1)

## [1] 0.5167714

a <- autoplot(usnetelec) + labs(title="Non transformed")
b <- autoplot(BoxCox(usnetelec,l1)) + labs(title = "BoxCox Transformation")
grid.arrange(a, b,nrow=1)

usgdp dataset

l1 <- BoxCox.lambda(usgdp) # transformed
print(l1)

## [1] 0.366352

a <- autoplot(usgdp) + labs(title="Non transformed")
b <- autoplot(BoxCox(usgdp,l1)) + labs(title = "BoxCox Transformation")
grid.arrange(a, b,nrow=1)

## mcopper dataset

l1 <- BoxCox.lambda(mcopper) # transformed
print(l1)

## [1] 0.1919047

a <- autoplot(mcopper) + labs(title="Non transformed")
b <- autoplot(BoxCox(mcopper,l1)) + labs(title = "BoxCox Transformation")
grid.arrange(a, b,nrow=1)

enplanements dataset

l1 <- BoxCox.lambda(enplanements) # transformed
print(l1)

## [1] -0.2269461

a <- autoplot(enplanements) + labs(title="Non transformed")
b <- autoplot(BoxCox(enplanements,l1)) + labs(title = "BoxCox Transformation")
grid.arrange(a, b,nrow=1)

As we can see, some of the datasets had more variance and some did not have.

Exercise 3.2

Why is a Box-Cox transformation unhelpful for the cangas data?

l1 <- BoxCox.lambda(cangas) # transformed
print(l1)

## [1] 0.5767759

a <- autoplot(cangas) + labs(title="Non transformed")
b <- autoplot(BoxCox(cangas,l1)) + labs(title = "BoxCox Transformation")
grid.arrange(a, b,nrow=1)

In this case, transformation was not needed because the data did not have abnormality that’s why we don’t see any difference before and after transformation in the above plot.

Exercise 3.3

What Box-Cox transformation would ytou select for your retail data (from Exercise 3 in Section 2.10)?

retail <- readxl::read_excel("retail.xlsx", skip = 1) # reading the excel file

tsdata <- ts(retail[,"A3349718A"], frequency = 12, start=c(1982,4)) # converting into timeseries data
a <- autoplot(tsdata) + labs(title="Non-Transformed Plot") # creating autoplot for selected column


# Box Cox Transformation
l1 <- BoxCox.lambda(tsdata)
print(paste0("The lambda value is ", l1))

## [1] "The lambda value is 0.186608760950338"

# Plotting the Box Cox Transformation
b <- autoplot(BoxCox(tsdata, l1))+ labs(title="BoxCox Transformation")

# Plotting both together
grid.arrange(a, b, nrow=1)

Overall, the trend is upwards and it is increasing throughout the time with slight stability between 2000 and 2003. It went up again after 2003. The chosen lambda value is 0.186

Exercise 3.8

For your retail time series (from Exercise 3 in Section 2.10):

(a) Split the data into two parts using

myts.train <- window(tsdata, end=c(2010,12)) # Training data between 1982 and 2010 with monthly data
myts.test <- window(tsdata, start=c(2011)) # Test data splitted after 2010

(b) Check that your data have been split appropriately by producing the following plot

autoplot(tsdata) +
  autolayer(myts.train, series="Training")+
  autolayer(myts.test, series= "Test")

(c) Calculate forecasts using snaive applied to myts.train

fc <- snaive(myts.train)
autoplot(fc)

(d) Compare the accuracy of your forecasts against the actual values stored in myts.test

accuracy(fc, myts.test)

##                     ME     RMSE      MAE       MPE      MAPE     MASE      ACF1
## Training set  7.073574 15.00116 11.52823  6.790115  9.574892 1.000000 0.8180097
## Test set     25.916667 28.98111 26.82500 10.150224 10.554647 2.326897 0.4757224
##              Theil's U
## Training set        NA
## Test set      2.009329

(e) Check the residuals. Do the residauls appear to be uncorrelated and normally distributed?

checkresiduals(fc)

## 
##  Ljung-Box test
## 
## data:  Residuals from Seasonal naive method
## Q* = 868.61, df = 24, p-value < 2.2e-16
## 
## Model df: 0.   Total lags used: 24

The residuals is not perfectly normally distributed. MOst of the data lies within boundary of 0 but some are spiking outside the range. ACF shows the value that are outside 0.

(f) How sensitive are the accuracy measures to the training / test split?

The errors for training and test sets are significantly different. Test dataset seem to have higher error as compared with training dataset.

Data 624 - HW2

Habib Khan

9/13/2020