library(fpp2)

## Warning: package 'fpp2' was built under R version 3.5.2

## Loading required package: ggplot2

## Loading required package: forecast

## Warning: package 'forecast' was built under R version 3.5.2

## Loading required package: fma

## Warning: package 'fma' was built under R version 3.5.2

## Loading required package: expsmooth

## Warning: package 'expsmooth' was built under R version 3.5.2

Question1

Data set books contains the daily sales of paperback and hardcover books at the same store. The task is to forecast the next four days’ sales for paperback and hardcover books.

#- Plot the series and discuss the main features of the data.
?books

## starting httpd help server ... done

autoplot(books)+
  ggtitle("Sales of paperback and hardcover books")+
  xlab("Time in Days")+
  ylab("Sales")

plot(books)

#Here we are doing bivariate analysis and both the variables are having positive trend. 
#However trend is positive for both, the variance is high for both the series.
#Correlation is low for both the series

#- Use the ses() function to forecast each series, and plot the forecasts.
#- Compute the RMSE values for the training data in each case.

Paperback <- books[,1]
Hardcover <- books[,2]

pb.ses.fit <- ses(Paperback, h=4)
round(accuracy(pb.ses.fit),2)

##                ME  RMSE   MAE  MPE  MAPE MASE  ACF1
## Training set 7.18 33.64 27.84 0.47 15.58  0.7 -0.21

pb.ses.fit[["model"]]

## Simple exponential smoothing 
## 
## Call:
##  ses(y = Paperback, h = 4) 
## 
##   Smoothing parameters:
##     alpha = 0.1685 
## 
##   Initial states:
##     l = 170.8271 
## 
##   sigma:  34.8183
## 
##      AIC     AICc      BIC 
## 318.9747 319.8978 323.1783

autoplot(Paperback) +
autolayer(fitted(pb.ses.fit), series="Fitted") +
  ggtitle("Sales of paperback books")+
  xlab("Time in Days")+
  ylab("Sales")

hc.ses.fit <- ses(Hardcover, h=4)
round(accuracy(hc.ses.fit),2)

##                ME  RMSE   MAE  MPE  MAPE MASE  ACF1
## Training set 9.17 31.93 26.77 2.64 13.39  0.8 -0.14

hc.ses.fit[["model"]]

## Simple exponential smoothing 
## 
## Call:
##  ses(y = Hardcover, h = 4) 
## 
##   Smoothing parameters:
##     alpha = 0.3283 
## 
##   Initial states:
##     l = 149.2861 
## 
##   sigma:  33.0517
## 
##      AIC     AICc      BIC 
## 315.8506 316.7737 320.0542

autoplot(Hardcover) +
autolayer(fitted(hc.ses.fit), series="Fitted") +
  ggtitle("Sales of hardcover books")+
  xlab("Time in Days")+
  ylab("Sales")

#- Apply Holt's linear method to the paperback and hardback series and compute four-day forecasts in each case.
pb.holt.fit <- holt(Paperback, h=4)
round(accuracy(pb.holt.fit),2)

##                 ME  RMSE   MAE   MPE  MAPE MASE  ACF1
## Training set -3.72 31.14 26.18 -5.51 15.58 0.66 -0.18

pb.holt.fit[["model"]]

## Holt's method 
## 
## Call:
##  holt(y = Paperback, h = 4) 
## 
##   Smoothing parameters:
##     alpha = 1e-04 
##     beta  = 1e-04 
## 
##   Initial states:
##     l = 170.699 
##     b = 1.2621 
## 
##   sigma:  33.4464
## 
##      AIC     AICc      BIC 
## 318.3396 320.8396 325.3456

autoplot(Paperback) +
  autolayer(pb.holt.fit,series="Holt's method", PI=FALSE) +
  autolayer(fitted(pb.holt.fit), series="Fitted") +
  ggtitle("Sales of paperback books")+
  xlab("Time in Days")+
  ylab("Sales")

hc.holt.fit <- holt(Hardcover, h=4)
round(accuracy(hc.holt.fit),2)

##                 ME  RMSE   MAE   MPE  MAPE MASE  ACF1
## Training set -0.14 27.19 23.16 -2.11 12.16 0.69 -0.03

hc.holt.fit[["model"]]

## Holt's method 
## 
## Call:
##  holt(y = Hardcover, h = 4) 
## 
##   Smoothing parameters:
##     alpha = 1e-04 
##     beta  = 1e-04 
## 
##   Initial states:
##     l = 147.7935 
##     b = 3.303 
## 
##   sigma:  29.2106
## 
##      AIC     AICc      BIC 
## 310.2148 312.7148 317.2208

autoplot(Hardcover) +
  autolayer(hc.holt.fit,series="Holt's method", PI=FALSE) +
  autolayer(fitted(hc.holt.fit), series="Fitted",PI=FALSE) +
  ggtitle("Sales of hardcover books")+
  xlab("Time in Days")+
  ylab("Sales")

## Warning: Ignoring unknown parameters: PI

#- Compare the RMSE measures of Holt's method for the two series to those of simple exponential smoothing in the previous question. (Remember that Holt's method is using one more parameter than SES.) Discuss the merits of the two forecasting methods for these data sets.

accuracy(pb.ses.fit)[2]

## [1] 33.63769

accuracy(pb.holt.fit)[2]

## [1] 31.13692

accuracy(hc.ses.fit)[2]

## [1] 31.93101

accuracy(hc.holt.fit)[2]

## [1] 27.19358

#For paperback, RMSE of holts method is lower.
#For Hardcover also ,RMSE of holts method is lower

#- Compare the forecasts for the two series using both methods. Which do you think is best?

autoplot(Hardcover) +
  autolayer(hc.ses.fit,series="SES method", PI=FALSE) +
  autolayer(hc.holt.fit,series="Holt's method", PI=FALSE) +
  ggtitle("Sales of hardcover books")+
  xlab("Time in Days")+
  ylab("Sales")

autoplot(Paperback) +
  autolayer(pb.ses.fit,series="SES method", PI=FALSE) +
  autolayer(pb.holt.fit,series="Holt's method", PI=FALSE) +
  ggtitle("Sales of Paperback books")+
  xlab("Time in Days")+
  ylab("Sales")

##The below plots compares the forecast of ses and holts seperately for hardcover and paperback.Here in both the case ses forecast is a flat line.Even though the trend line is positively upwards, the forecast of holts looks to be more precise compared to the other methods.

#- Calculate a 95% prediction interval for the first forecast for each series, using the RMSE values and assuming normal errors. Compare your intervals with those produced using ses and holt
#95% PI for PAPERBACK for ses
print(paste("95% LOWER PI FOR PB SES",round(pb.ses.fit$lower[1,2],2)))

## [1] "95% LOWER PI FOR PB SES 138.87"

print(paste("95% UPPER PI FOR PB SES",round(pb.ses.fit$upper[1,2],2)))

## [1] "95% UPPER PI FOR PB SES 275.35"

#95% PI for PAPERBACK for holt
print(paste("95% LOWER PI FOR PB HOLT",round(pb.holt.fit$lower[1,2],2)))

## [1] "95% LOWER PI FOR PB HOLT 143.91"

print(paste("95% UPPER PI FOR PB HOLT",round(pb.holt.fit$upper[1,2],2)))

## [1] "95% UPPER PI FOR PB HOLT 275.02"

#95% PI for Hardcover for ses
print(paste("95% LOWER PI FOR HC SES",round(hc.ses.fit$lower[1,2],2)))

## [1] "95% LOWER PI FOR HC SES 174.78"

print(paste("95% UPPER PI FOR HC SES",round(hc.ses.fit$upper[1,2],2)))

## [1] "95% UPPER PI FOR HC SES 304.34"

#95% PI for Hardcover for holt
print(paste("95% LOWER PI FOR HC HOLT",round(hc.holt.fit$lower[1,2],2)))

## [1] "95% LOWER PI FOR HC HOLT 192.92"

print(paste("95% UPPER PI FOR HC HOLT",round(hc.holt.fit$upper[1,2],2)))

## [1] "95% UPPER PI FOR HC HOLT 307.43"

QUESTION 2

For this exercise use data set visitors, the monthly Australian short-term overseas visitors data, May 1985-April 2005.

#- Make a time plot of your data and describe the main features of the series.
autoplot(visitors)+
  ggtitle("Overseas visitors to Australia")+
  ylab("Thousands of people")+
  xlab("Year")

seasonplot(visitors)

ggsubseriesplot(visitors)+  
  ggtitle("Overseas visitors to Australia")+
  ylab("Thousands of people")+
  xlab("months")

#The autoplot shows that there is a positive upward trend in the data. There seems to be seasonality as well. To confirm that i ran the seasonplot as well as subseries to see the data at month level.
#In seasonplot the visitors are getting reduced during the monthsApril and May and then shooting up from Jun till December and Next year Jan, Feb and March.

#- Split your data into a training set and a test set comprising the last two years of available data. Forecast the test set using Holt-Winters' multiplicative method.
length(visitors)

## [1] 240

train.visitor<- window(visitors,end=c(2003,4))
test.visitor<- window(visitors,start=c(2003,5))

hw.train.visitor <- hw(train.visitor, h = 24,seasonal = "multiplicative")
autoplot(train.visitor)+
  autolayer(hw.train.visitor,series="Holt-winter method",PI=FALSE)+
 ggtitle("Holt winter method- Overseas visitors to Australia")+
  ylab("Thousands of people")+
  xlab("Year")

#To see the pi level
autoplot(hw.train.visitor) +
   ggtitle("Holt winter method- Overseas visitors to Australia")+
  ylab("Thousands of people")+
  xlab("Year")

#- Why is multiplicative seasonality necessary here?
#Here seasonality is increasing over time. That is the reason we used multiplicative method. If seasonality is constant over time, we would have used additive method.

#- Forecast the two-year test set using each of the following methods:

#  (an ETS model; an additive ETS model applied to a Box-Cox transformed series; a seasonal naïve method; an STL decomposition applied to the Box-Cox transformed data followed by an ETS model applied to the seasonally adjusted (transformed) data. )

#ETS model
fct.ets_train.visitors<- forecast(ets(train.visitor),h=24)
autoplot(fct.ets_train.visitors)+
   ggtitle("ETS method- Overseas visitors to Australia")+
  ylab("Thousands of people")+
  xlab("Year")

#additive ETS model applied to a Box-Cox transformed series
fct.addt.ets.train.visitors <- forecast(
  ets(train.visitor, 
      lambda = BoxCox.lambda(train.visitor),
      additive.only = TRUE),
  h = 24
)
autoplot(fct.addt.ets.train.visitors)+
   ggtitle("ADDITIVE ETS TRANSFORMED- Overseas visitors to Australia")+
  ylab("Thousands of people")+
  xlab("Year")

#Seasonal naïve method
fct.snaive_train.visitors <- snaive(train.visitor, h = 24)
autoplot(fct.snaive_train.visitors)+
     ggtitle("snaive- Overseas visitors to Australia")+
  ylab("Thousands of people")+
  xlab("Year")

#STL decomposition applied to the Box-Cox transformed data followed by an ETS model applied to the seasonally adjusted (transformed) data
fct.stl.boxcox.train.visitors <- train.visitor %>%
  stlm(
    lambda = BoxCox.lambda(train.visitor),
    s.window = 13,
    robust = TRUE,
    method = "ets"
  ) %>%
  forecast(h = 24)
autoplot(fct.stl.boxcox.train.visitors)+
     ggtitle("stl-boxcox-ets- Overseas visitors to Australia")+
  ylab("Thousands of people")+
  xlab("Year")

#- Which method gives the best forecasts? Does it pass the residual tests?
accuracy(fct.ets_train.visitors, test.visitor)

##                      ME     RMSE      MAE        MPE      MAPE     MASE
## Training set  0.7640074 14.53480 10.57657  0.1048224  3.994788 0.405423
## Test set     72.1992664 80.23124 74.55285 15.9202832 16.822384 2.857773
##                     ACF1 Theil's U
## Training set -0.05311217        NA
## Test set      0.58716982  1.127269

accuracy(fct.addt.ets.train.visitors, test.visitor)

##                     ME     RMSE      MAE        MPE      MAPE      MASE
## Training set  1.001363 14.97096 10.82396  0.1609336  3.974215 0.4149057
## Test set     69.458843 78.61032 72.41589 15.1662261 16.273089 2.7758586
##                     ACF1 Theil's U
## Training set -0.02535299        NA
## Test set      0.67684148  1.086953

accuracy(fct.snaive_train.visitors, test.visitor)

##                    ME     RMSE      MAE      MPE      MAPE     MASE
## Training set 17.29363 31.15613 26.08775 7.192445 10.285961 1.000000
## Test set     32.87083 50.30097 42.24583 6.640781  9.962647 1.619375
##                   ACF1 Theil's U
## Training set 0.6327669        NA
## Test set     0.5725430 0.6594016

accuracy(fct.stl.boxcox.train.visitors, test.visitor)

##                      ME     RMSE       MAE         MPE     MAPE      MASE
## Training set  0.5803348 13.36431  9.551391  0.08767744  3.51950 0.3661256
## Test set     76.3637263 84.24658 78.028992 16.87750474 17.51578 2.9910209
##                     ACF1 Theil's U
## Training set -0.05924203        NA
## Test set      0.64749552  1.178154

checkresiduals(fct.snaive_train.visitors, plot = TRUE)

## 
##  Ljung-Box test
## 
## data:  Residuals from Seasonal naive method
## Q* = 295.02, df = 24, p-value < 2.2e-16
## 
## Model df: 0.   Total lags used: 24

#By comparing the RMSE, Snaive method performed better for the test data. Then Additive etl model with boxcox transformation method, then ets method and the STL decomposition applied to the Box-Cox transformed data followed by an ETS model applied to the seasonally adjusted (transformed) data in the increasing order of RMSE.
#By seeing hte residual plot, it looks like there is some pattern in the residual plot which doesnot looks good interms of model performance. Also the mean of the residual distribution is not centered at zero and it is shifted to right. Also there is significant correlation in acf graph which shows the white noise. So overall seasonal naive is not capturing the the data well.

#- Compare the same four methods using time series cross-validation with the tsCV()function instead of using a training and test set. Do you come to the same conclusions?

##ETS model
fets <- function(x, h){
  forecast(ets(x), h = h)
}
e1 <- tsCV(visitors, fets, h = 1)

##ETS additive model with Box-Cox transformation
fets_add <- function(x, h){
  forecast(ets(x, lambda = BoxCox.lambda(x), additive.only = TRUE), h = h)
}
e2 <- tsCV(visitors, fets_add, h = 1)

##Seasonal Naïve Method
e3 <- tsCV(visitors, snaive, h = 1)

##ETS model with STL decomposition and Box-Cox transformation
fets_stl <- function(x, h){
  stlf(x, h, t.window = 13, s.window = "periodic", robust = TRUE, method = "ets", lambda = BoxCox.lambda(x))
}
e4 <- tsCV(visitors, fets_stl, h = 1)

#Compare MSE 
#  MSE for the ETS Model
mean(e1^2, na.rm = TRUE)

## [1] 343.3552

## [1] 343.3552


# MSE for the ETS Additive model
mean(e2^2, na.rm = TRUE)

## [1] 355.8652

# MSE for the Seasonal Naïve Method
mean(e3^2, na.rm = TRUE)

## [1] 1074.971

# MSE for the ETS model with STL decomposition and Box-Cox transformation
mean(e4^2, na.rm = TRUE)

## [1] 355.965

Question3

Use ets() on the following series:

bicoal, chicken, dole,

Does it always give good forecasts?
Find an example where it does not work well. Can you figure out why?

autoplot(bicoal)

bicoal.fit <- ets(bicoal)
fct.bicoal<-forecast(bicoal.fit,h=5)
autoplot(forecast(bicoal.fit))

fct.bicoal[["model"]]

## ETS(M,N,N) 
## 
## Call:
##  ets(y = bicoal) 
## 
##   Smoothing parameters:
##     alpha = 0.8205 
## 
##   Initial states:
##     l = 542.665 
## 
##   sigma:  0.1262
## 
##      AIC     AICc      BIC 
## 595.2499 595.7832 600.9253

#Only level parameter=0.82
#Prediction interval is too huge,eventhough alpha is close to 1,the forecast is purely based on latest observation, however we got a straight line,that does not fit good. not a good method for this data.

autoplot(chicken)

chicken.fit <- ets(chicken)
autoplot(forecast(chicken.fit))

fct.chicken <-forecast(chicken.fit,h=5)
fct.chicken[["model"]]

## ETS(M,N,N) 
## 
## Call:
##  ets(y = chicken) 
## 
##   Smoothing parameters:
##     alpha = 0.98 
## 
##   Initial states:
##     l = 159.8322 
## 
##   sigma:  0.1691
## 
##      AIC     AICc      BIC 
## 635.2382 635.6018 641.9836

#There is a decreasing trend .
#Alpha is 0.98 that is pretty large and it shows that forecast is purely based on latest observation and PI interval looks to be not that broad .

autoplot(dole)

dole.fit <- ets(dole)
autoplot(forecast(dole.fit))

fct.dole<- forecast(dole.fit,h=5)
fct.dole[["model"]]

## ETS(M,Ad,M) 
## 
## Call:
##  ets(y = dole) 
## 
##   Smoothing parameters:
##     alpha = 0.697 
##     beta  = 0.124 
##     gamma = 0.303 
##     phi   = 0.902 
## 
##   Initial states:
##     l = 2708.6621 
##     b = 836.017 
##     s = 1.0404 0.8893 0.9103 1.0301 1.0576 1.0584
##            0.9801 0.9632 1.021 0.9838 1.0145 1.0514
## 
##   sigma:  0.0935
## 
##      AIC     AICc      BIC 
## 10602.67 10604.30 10676.19

#Here Y-axis scale is different for forecast.however forecast seems to be capture the trend of the data .
#PI is significantly broader and that makes this method not that great for this dataset.
#Since there is a significant variance in dataset after 1975,i would feel to add the boxcox transformation to make the data consistent.

QUESTION4

Compare ets(), snaive() and stlf() on the following three time series. For stlf(), you might need to use a Box-Cox transformation. Use a test set of three years to decide what gives the best forecasts. bricksq, dole, a10.

#bricksq data
autoplot(bricksq)

length(bricksq)

## [1] 155

bricksq_train <- subset(bricksq, end = c(143))
bricksq_test <- subset(bricksq, start = c(144))
ets_bricksq_train <- forecast(ets(bricksq_train), h = 12)
snaive_bricksq_train <- snaive(bricksq_train,  h = 12)
stlf_bricksq_train <- stlf(bricksq_train, h = 12, lambda = BoxCox.lambda(bricksq_train))

autoplot(bricksq) +
  autolayer(ets_bricksq_train,series="ETS method", PI=FALSE) +
   autolayer(snaive_bricksq_train,series="SNAIVE method", PI=FALSE)  +
   autolayer(stlf_bricksq_train,series="STLF method", PI=FALSE) +
  ggtitle("Quarterly clay brick production")+
  xlab("Time ")+
  ylab("PRODUCTION")

accuracy(ets_bricksq_train, bricksq_test)

##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set  1.228708 21.96146 15.81586 0.2606171 3.912882 0.4299638
## Test set     37.682916 42.36696 37.68292 8.4157549 8.415755 1.0244329
##                   ACF1 Theil's U
## Training set 0.2038074        NA
## Test set     0.6190546  1.116608

accuracy(snaive_bricksq_train, bricksq_test)

##                     ME     RMSE      MAE      MPE     MAPE      MASE
## Training set  6.438849 50.25482 36.78417 1.430237 8.999949 1.0000000
## Test set     15.333333 37.15284 33.66667 3.231487 7.549326 0.9152487
##                     ACF1 Theil's U
## Training set  0.81169827        NA
## Test set     -0.09314867 0.9538702

accuracy(stlf_bricksq_train, bricksq_test)

##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set  1.307145 20.65231 14.88304 0.3317424 3.630985 0.4046045
## Test set     39.841978 44.31471 40.65759 8.9561467 9.153632 1.1053013
##                   ACF1 Theil's U
## Training set 0.2143369        NA
## Test set     0.5207064  1.152856

#It looks like Snaive method has performed good compared to other two as it has less RMSE Value. Also in the autoplot,the snaive method looks to be forecasted better compared to other two

#dole
autoplot(dole)

dole_train <- subset(dole, end = c(427))
dole_test <- subset(dole, start = c(428))
ets_dole_train <- forecast(ets(dole_train), h = 12)
snaive_dole_train <- snaive(dole_train,  h = 12)
stlf_dole_train <- stlf(dole_train, h = 12, lambda = BoxCox.lambda(dole_train))

autoplot(dole) +
  autolayer(ets_dole_train,series="ETS method", PI=FALSE) +
   autolayer(snaive_dole_train,series="SNAIVE method", PI=FALSE)  +
   autolayer(stlf_dole_train,series="STLF method", PI=FALSE) +
  ggtitle("Unemployment benefits in Australia")+
  xlab("Time ")+
  ylab("Total people")

accuracy(ets_dole_train, dole_test)

##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set  419.4624 18184.20 10856.63 0.5111136 6.268750 0.3014767
## Test set     5143.1303 46330.29 40948.53 1.0361996 5.319935 1.1370959
##                   ACF1 Theil's U
## Training set 0.5191139        NA
## Test set     0.8678915  2.792915

accuracy(snaive_dole_train, dole_test)

##                     ME     RMSE      MAE       MPE     MAPE    MASE
## Training set  16033.54  63643.2  36011.5  3.781301 27.34644 1.00000
## Test set     222180.50 224777.8 222180.5 28.712922 28.71292 6.16971
##                   ACF1 Theil's U
## Training set 0.9655291        NA
## Test set     0.6869117  13.15477

accuracy(stlf_dole_train, dole_test)

##                      ME      RMSE       MAE       MPE     MAPE      MASE
## Training set   428.6905  6534.903  3816.433 0.2205941 3.619427 0.1059782
## Test set     12861.3268 29442.441 22340.560 1.5036966 2.719281 0.6203730
##                     ACF1 Theil's U
## Training set -0.03900888        NA
## Test set      0.69924487  1.692465

#Here RMSE is very high for snaive and ets.It looks better for stlf compared to other two because of transformation.
#Autoplot also justify better forecasting for stlf compared to other two

#a10

autoplot(a10)

a10_train <- subset(a10, end = c(192))
a10_test <- subset(a10, start = c(193))
ets_a10_train <- forecast(ets(a10_train), h = 12)
snaive_a10_train <- snaive(a10_train,  h = 12)
stlf_a10_train <- stlf(a10_train, h = 12, lambda = BoxCox.lambda(a10_train))


autoplot(a10) +
  autolayer(ets_a10_train,series="ETS method", PI=FALSE) +
   autolayer(snaive_a10_train,series="SNAIVE method", PI=FALSE)  +
   autolayer(stlf_a10_train,series="STLF method", PI=FALSE) +
  ggtitle("Monthly anti-diabetic drug sales in Australia from 1991 to 2008.")+
  xlab("Time ")+
  ylab("Sale")

accuracy(ets_a10_train, a10_test)

##                      ME      RMSE       MAE       MPE     MAPE      MASE
## Training set 0.08142015 0.6834087 0.4666522 0.3611560 4.366536 0.4031628
## Test set     0.23749664 1.9938413 1.6968091 0.8537386 7.596603 1.4659532
##                    ACF1 Theil's U
## Training set -0.1088647        NA
## Test set     -0.3483479 0.6186824

accuracy(snaive_a10_train, a10_test)

##                    ME     RMSE      MAE      MPE     MAPE     MASE
## Training set 1.118474 1.475289 1.157478 10.85668 11.22859 1.000000
## Test set     2.899299 3.901300 3.362144 12.21499 14.68060 2.904715
##                    ACF1 Theil's U
## Training set  0.4420062        NA
## Test set     -0.1958294  1.191566

accuracy(stlf_a10_train, a10_test)

##                      ME      RMSE      MAE        MPE     MAPE      MASE
## Training set 0.02179683 0.5926686 0.406025 -0.1409417 3.874051 0.3507841
## Test set     0.37206467 1.8798559 1.469918  1.2852494 6.785370 1.2699313
##                     ACF1 Theil's U
## Training set -0.06320692        NA
## Test set     -0.43197435  0.596385

#Here RMSE looks good for stlf compared to other two because of transformation.
#Autoplot also justify better forecasting for stlf compared to other two

QUESTION5

For this exercise use data set ukcars, the quarterly UK passenger vehicle production data from 1977Q1-2005Q1.

# Plot the data and describe the main features of the series.
?ukcars
autoplot(ukcars)

ggsubseriesplot(ukcars, main = NULL)

seasonplot(ukcars)

#There is a positive trend in car production until 2000.And there is a sudden decrease in production 2000 till 2003 and then a slight increase in production
#By seeing the ggsubseries,we can understand that production is less in third quarter.

#- Decompose the series using STL and obtain the seasonally adjusted data.
fit.stl.ukcars <- stl(ukcars, s.window = 13, robust = TRUE)

autoplot(fit.stl.ukcars) +  xlab("Time in quarters") + ylab("production counts") + ggtitle("STL Decomposition -Car Production")

ukcars.ssadj <- seasadj(fit.stl.ukcars)

#- Forecast the next two years of the series using an additive damped trend method applied to the seasonally adjusted data. (This can be done in one step using stlf()with arguments etsmodel="AAN", damped=TRUE.)

fit.aan.ukcars <- stlf(ukcars, s.window = 13, robust = TRUE, etsmodel="AAN", damped=TRUE)

fct.aan.ukcars <- forecast(fit.aan.ukcars, h = 8)

autoplot(fct.aan.ukcars) +  xlab("Time in quarters") + ylab("production counts") + ggtitle("AAN FORECAST -Car Production")

#- Forecast the next two years of the series using Holt's linear method applied to the seasonally adjusted data (as before but with damped=FALSE).

fit.hlm.ukcars <- stlf(ukcars, s.window = 13, robust = TRUE, etsmodel="AAN", damped=FALSE)

fct.hlm.ukcars <- forecast(fit.hlm.ukcars, h = 8)

autoplot(fct.hlm.ukcars) +  xlab("Time in quarters") + ylab("production counts") + ggtitle("hlm FORECAST -Car Production")

#- Now use ets() to choose a seasonal model for the data.

fit.ets.ukcars <- ets(ukcars)

fct.ets.ukcars <- forecast(fit.ets.ukcars, h = 8)

fct.ets.ukcars[["model"]]

## ETS(A,N,A) 
## 
## Call:
##  ets(y = ukcars) 
## 
##   Smoothing parameters:
##     alpha = 0.6199 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 314.2568 
##     s = -1.7579 -44.9601 21.1956 25.5223
## 
##   sigma:  25.9302
## 
##      AIC     AICc      BIC 
## 1277.752 1278.819 1296.844

autoplot(fct.ets.ukcars) +  xlab("Time in quarters") + ylab("production counts") + ggtitle("ets FORECAST -Car Production")

#- Compare the RMSE of the ETS model with the RMSE of the models you obtained using STL decompositions. Which gives the better in-sample fits?
#Compare the forecasts from the three approaches? Which seems most reasonable?

accuracy(fit.aan.ukcars)

##                    ME     RMSE     MAE       MPE     MAPE      MASE
## Training set 2.404023 24.67648 19.6208 0.2220182 6.391966 0.6394325
##                    ACF1
## Training set 0.03554102

accuracy(fit.hlm.ukcars)

##                      ME     RMSE     MAE        MPE     MAPE      MASE
## Training set -0.1069739 24.79848 19.2994 -0.6297295 6.362326 0.6289582
##                    ACF1
## Training set 0.02121772

accuracy(fit.ets.ukcars)

##                    ME     RMSE      MAE        MPE     MAPE      MASE
## Training set 1.313884 25.23244 20.17907 -0.1570979 6.629003 0.6576259
##                    ACF1
## Training set 0.02573334

#By seeing the RMSE, the first method, additive damped method gives less error compared to others and hence it fits better compared to other methods
#Forecast of all the three model looks pretty similar.however considering rmse, we could pick additive damped method as a reasonable model

#- Check the residuals of your preferred model.
checkresiduals(fct.aan.ukcars)

## Warning in checkresiduals(fct.aan.ukcars): The fitted degrees of freedom is
## based on the model used for the seasonally adjusted data.

## 
##  Ljung-Box test
## 
## data:  Residuals from STL +  ETS(A,Ad,N)
## Q* = 19.758, df = 3, p-value = 0.0001905
## 
## Model df: 5.   Total lags used: 8

#It looks like residuals vary around zero 
#residual distribution looks to be skewed to right
#there is no correlation as per acf. Overall residual looks better

QUESTION6

For this exercise use data set eggs, the price of a dozen eggs in the United States from 1900-1993. Experiment with the various options in the holt() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each argument is doing to the forecasts. Which model gives the best RMSE?

fct.holt.eggs <- holt(eggs)
fct.holt.eggs[["model"]]

## Holt's method 
## 
## Call:
##  holt(y = eggs) 
## 
##   Smoothing parameters:
##     alpha = 0.8124 
##     beta  = 1e-04 
## 
##   Initial states:
##     l = 314.7232 
##     b = -2.7222 
## 
##   sigma:  27.1665
## 
##      AIC     AICc      BIC 
## 1053.755 1054.437 1066.472

autoplot(fct.holt.eggs) + 
  autolayer(fitted(fct.holt.eggs), series = "Fitted") + 
  xlab("Year") + ylab("Dollars") + ggtitle("Forecast using Holt's Method")

fct.holt.damped.eggs <- holt(eggs, damped = TRUE)
fct.holt.damped.eggs[["model"]]

## Damped Holt's method 
## 
## Call:
##  holt(y = eggs, damped = TRUE) 
## 
##   Smoothing parameters:
##     alpha = 0.8462 
##     beta  = 0.004 
##     phi   = 0.8 
## 
##   Initial states:
##     l = 276.9842 
##     b = 4.9966 
## 
##   sigma:  27.2755
## 
##      AIC     AICc      BIC 
## 1055.458 1056.423 1070.718

autoplot(fct.holt.damped.eggs) + 
  autolayer(fitted(fct.holt.damped.eggs), series = "Fitted") + 
  xlab("Year") + ylab("Dollars") + ggtitle("Forecast using Holt's Method")

fct.holt.bxcox.eggs <- holt(eggs, lambda = BoxCox.lambda(eggs))
autoplot(fct.holt.bxcox.eggs) + 
  autolayer(fitted(fct.holt.bxcox.eggs), series = "Fitted") + 
  xlab("Year") + ylab("Dollars") + ggtitle("Forecast using Holt's Method with Box-Cox Transformation")

fct.holt.bxcox.damped.eggs <- holt(eggs, lambda = BoxCox.lambda(eggs), damped = TRUE)
autoplot(fct.holt.bxcox.damped.eggs) + 
  autolayer(fitted(fct.holt.bxcox.damped.eggs), series = "Fitted") + 
  xlab("Year") + ylab("Dollars") + ggtitle("Forecast using Holt's Method with Damped and Box-Cox Transformation")

#By seeing the plots,holt method with box cox transformataion gives better fit
#It has less prediction interval compared to other methods

#Accuracy
accuracy(fct.holt.eggs)

##                      ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.04499087 26.58219 19.18491 -1.142201 9.653791 0.9463626
##                    ACF1
## Training set 0.01348202

accuracy(fct.holt.damped.eggs)

##                     ME     RMSE     MAE       MPE     MAPE      MASE
## Training set -2.891496 26.54019 19.2795 -2.907633 10.01894 0.9510287
##                      ACF1
## Training set -0.003195358

accuracy(fct.holt.bxcox.eggs)

##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.7736844 26.39376 18.96387 -1.072416 9.620095 0.9354593
##                    ACF1
## Training set 0.03887152

accuracy(fct.holt.bxcox.damped.eggs)

##                      ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -0.8200445 26.53321 19.45654 -2.019718 9.976131 0.9597618
##                     ACF1
## Training set 0.005852382

#rmse is lower for holt method with box cox transformation.

DSCI-FXP-SP19- Assignment #5 Exponential Smoothing Nidhin

Nidhin

7 March 2019

Question1

QUESTION 2

Question3

QUESTION4

QUESTION5

QUESTION6