question 1

Use the ses() function in R to find the optimal values of alpha and level

  • To explore the effect of starting point in the time series, I explore the dataset without the first two years along side the entire dataset. The first two years had large fluctuations in datapoints that were out of line with the dataset as a whole
    • I then plugged the individual datasets into an stl decomposition to see what trends and seasonality behavior existed in each. Seasoanlity seems very constant, and the trend seems rather flat for both.
  • the question wants us to use ses to find the optimal parameters for alpha and l_{0}. Left as null the ses function will optmize these parameters for us. For the shortened dataset the answers are as follows
    • alpha= .3061
    • l_{0} = 87951
  • Assuming we use the entire dataset like we are supposed to, the parameters optimised are as follows:
    • alpha= .2971
    • l_{0} = 77260
  • that seems like a rather large difference in initial level, which makes sense. What is interesting to look at, is how prediction changes. Predictions are stored as means in the model. We wantt o compare the first prediction so model$mean[1]
    • The mean for our entire dataset(1980 forward)= 98816 and the mean for trimmed dataset(1982 forward)= 98807, rather close
pigs <- fma::pigs
tsdisplay(pigs)

## create shorter dataset
pigdata <- window(pigs,start=1982)

## 1982 forward ses model
fc1 <- ses(pigdata, h=4)

## entire dataset model
fc2 <- ses(pigs, h=4)


autoplot(fc2,main="entire dataset") +
  autolayer(fitted(fc2), series="Fitted") +
  ylab("pigs") + xlab("Year")

autoplot(fc1,main="1982 forward") +
  autolayer(fitted(fc1), series="Fitted") +
  ylab("pigs") + xlab("Year")

# fc1$model
# 
# fc2$mean[1]
# fc1$upper[1]


stl_decomposition <- pigdata %>%
  stl(t.window=13, s.window="periodic", robust=TRUE)
stl_decomposition %>% 
    autoplot()+
    ggtitle("stl decomposition
    of pigs 1982-forward")

stl_decomposition <- pigs %>%
  stl(t.window=13, s.window="periodic", robust=TRUE)
stl_decomposition %>% 
    autoplot()+
    ggtitle("stl decomposition
    of pigs entire dataset")

b create own interval and compare to r

  • for this question I will just use the entire dataset
  • we can choose to take the mean given in the model prediction(y_hat). However, I decided to attempt to build the mean with the given formula.
    • y hat is 98816
  • when I take the sd of the residuals it doesn’t equal the sigma returned form the SES function
    • sigma_ses = 10308.58
    • my_sigma = 10273.69
  • therefore when we compute the intervals, they are in fact different which you can see printed below
# build y_hat
alpha <-  .2971
my_mean <- alpha*pigs[188]+(1-alpha)*fc2$fitted[188]

# compare y_hat to models esitmate
round(my_mean,0)==round(fc2$mean[1],0)
## [1] TRUE
fc2$model
## Simple exponential smoothing 
## 
## Call:
##  ses(y = pigs, h = 4) 
## 
##   Smoothing parameters:
##     alpha = 0.2971 
## 
##   Initial states:
##     l = 77260.0561 
## 
##   sigma:  10308.58
## 
##      AIC     AICc      BIC 
## 4462.955 4463.086 4472.665
upper <- my_mean+1.96 *sd(fc2$residuals)
lower <- my_mean-1.96 *sd(fc2$residuals)



ses_upper <- fc2$upper[1]
ses_lower <- fc2$lower[1]
df <- data.frame(cbind(c(upper,lower),c(ses_upper,ses_lower)))
rownames(df) <- c("upper","lower")
colnames(df) <- c("MY_vals","SES_vals")
kable(df,digits =2,'markdown',booktabs =T)
MY_vals SES_vals
upper 118952.73 112027.38
lower 78679.85 85605.43

Question 5 Data set Books

A PLot and discuss

  • daily sales of hard cover and paperback books
autoplot(books,facets=TRUE)

autoplot(books)

  • looks like there is a trend where the sales pick up by the end of the month
  • the daily seasonality doesn’t look constant
  • lets seperate the columns and explore

B forecast and plot with ses

  • predicted value for hard covered = 239.56
  • predicted value for paper back = 207.19
books <- data.frame(books)
Hardcover <- ts(books$Hardcover)
#autoplot(Hardcover)
paperback <- ts(books$Paperback)

fc11 <- ses(Hardcover,h=4)
fc22 <- ses(paperback,h=4)

plot_grid(
autoplot(fc11) +
  autolayer(fitted(fc11), series="Fitted") +
  ylab("hard cover") + xlab("Year"),
autoplot(fc22) +
  autolayer(fitted(fc22), series="Fitted") +
  ylab("Paperback") + xlab("Year"),ncol=1
)

fc11$mean
## Time Series:
## Start = 31 
## End = 34 
## Frequency = 1 
## [1] 239.5601 239.5601 239.5601 239.5601
fc22$mean
## Time Series:
## Start = 31 
## End = 34 
## Frequency = 1 
## [1] 207.1097 207.1097 207.1097 207.1097

C Compute rmse

  • 31.93 and 33.64
round(forecast::accuracy(fc11),2)[2]
## [1] 31.93
round(forecast::accuracy(fc22),2)[2]
## [1] 33.64

Question 6 Now use Holts

A get forecasts

  • hardcover = 250.1739 253.4765 256.7791 260.0817
  • softcover = 209.4668 210.7177 211.9687 213.2197
holt1 <- holt(Hardcover, h=4)
holt1$mean
## Time Series:
## Start = 31 
## End = 34 
## Frequency = 1 
## [1] 250.1739 253.4765 256.7791 260.0817
holt2 <- holt(paperback, h=4)
holt2$mean
## Time Series:
## Start = 31 
## End = 34 
## Frequency = 1 
## [1] 209.4668 210.7177 211.9687 213.2197

B Get RMSE compare to ses

  • hardcover - 27.19 versus 31.93
  • softcover - 31.14 versus 33.64
round(forecast::accuracy(holt1),2)
##                 ME  RMSE   MAE   MPE  MAPE MASE  ACF1
## Training set -0.14 27.19 23.16 -2.11 12.16 0.69 -0.03
round(forecast::accuracy(holt2),2)
##                 ME  RMSE   MAE   MPE  MAPE MASE  ACF1
## Training set -3.72 31.14 26.18 -5.51 15.58 0.66 -0.18

C compare holts to ses

  • below i plot the smoothing curves together and then i map out predictions
  • Looking at hardcover it seems liekly that the holts method does a better job. It could be because there is a trend in the hardcover data
  • Looking at the paperback, its difficult to discern which method will be a better prediction
autoplot(holt1) +
  autolayer(fitted(holt1), series="ses") +
    autolayer(fitted(fc11), series="holts")+
  ylab("hard cover") + xlab("Year")

autoplot(holt2) +
  autolayer(fitted(holt2), series="Fitted") +
    autolayer(fitted(fc22), series="holts")+
  ylab("Paperback") + xlab("Year")

round(forecast::accuracy(holt1),2)
##                 ME  RMSE   MAE   MPE  MAPE MASE  ACF1
## Training set -0.14 27.19 23.16 -2.11 12.16 0.69 -0.03

D

  • Using rmse the interval is different than taking the estimated intervals from holt
    • I calculated the rmse by taking the 1st predictied value +- rmse * 1.96
library(forecast)
upper_hard_holt <- holt1$mean[1] + round(forecast::accuracy(fc11)[2],2)*1.96
lower_hard_holt <-  holt1$mean[1] - round(forecast::accuracy(fc11)[2],2)*1.96
upper_paper_holt <-  holt2$mean[1] + round(forecast::accuracy(fc22)[2],2)*1.96
lower_paper_holt <-  holt2$mean[1] - round(forecast::accuracy(fc22)[2],2)*1.96


books_holt_w_rmse <- c(upper_hard_holt,lower_hard_holt,upper_paper_holt,lower_paper_holt)
books_ses <- c(fc11$upper[5],fc11$lower[5],fc22$upper[5],fc22$lower[5])
books_holt <-  c(holt1$upper[5],holt1$lower[5],holt2$upper[5],holt2$lower[5])
summary_results <- data.frame(rbind(books_holt_w_rmse,books_ses,books_holt))
colnames(summary_results) <- c("hard_upper","hard_lower","paperback_upper","paperback_lower")
kable(summary_results,digits =2,'markdown',booktabs =T)
hard_upper hard_lower paperback_upper paperback_lower
books_holt_w_rmse 312.76 187.59 275.40 143.53
books_ses 304.34 174.78 275.35 138.87
books_holt 307.43 192.92 275.02 143.91
## comparing rmse using tscv
e1 <- tsCV(Hardcover, ses, h=1)
e2 <- tsCV(Hardcover, holt, h=1)
e3 <- tsCV(Hardcover, holt,damped=TRUE, h=1) 
mean(e1^2, na.rm=TRUE)
## [1] 1168.501
mean(e2^2, na.rm=TRUE)
## [1] 1194.264
mean(e3^2, na.rm=TRUE)
## [1] 1186.913

Question 7 eggs

  • eggs is yearly data
  • After plotting lambda transformed side by side with original, Lambda transformed doesn’t look all that much better
  • clear downward trend in data with large cyclical beahvior
  • I graph the prediction intervals for
    • holt w damped, holt, lambda-transfomred holt, lambda-transformed and damped holt
    • non boxcox transformed data starts predicting negative 95% intervals within h=2
  • The rmse and MAE in the training set appear to also confirm that the box cox transformed model is the best model
  • The takeaway here is that due to the large downward trend, holt is incapable of limiting predictions to a range of real possible outcomes(price >0). Boxcox forces non-negative values.
autoplot(eggs)

lambda <- BoxCox.lambda(eggs)
#autoplot(BoxCox(myts,lambda))

plot_grid(autoplot(BoxCox(eggs,lambda)),autoplot(eggs),labels = c('Lambda trans', 'original'))

fit1 <- holt(eggs ,damped=TRUE,h=100)
fit2 <- holt(eggs, h=100)
fit3 <- holt(eggs, h=100,lambda =lambda )
fit4 <- holt(eggs, h=100,lambda =lambda,damped=TRUE )

autoplot(holt(eggs, h=100)) +
  autolayer(fit1, series="damped") +
    autolayer(fit2, series="holts")+
    autolayer(fit3, series="boxcox")+
    autolayer(fit4, series="boxcox_damped")+
  ylab("Paperback") + xlab("Year")

holt_damped <- forecast::accuracy(fit1)[2]
holt_simple <- forecast::accuracy(fit2)[2]
box_cox_holt <- forecast::accuracy(fit3)[2]
box_cox_damped_holt <- forecast::accuracy(fit4)[2]

test_results <- data.frame(rbind(holt_damped,holt_simple,box_cox_holt,box_cox_damped_holt))
colnames(test_results) <- "rmse"
kable(test_results,digits =2,'markdown',booktabs =T)
rmse
holt_damped 26.54
holt_simple 26.58
box_cox_holt 26.39
box_cox_damped_holt 26.53
#forecast()



a <- data.frame(fit1)
b <- data.frame(fit2)
#

Question 8 retail time series data

A Why is multiplicative seasonality necessary for this series?

  • You can see from the autoplot below that the seasonality isnt constant and therefore we need to diagnose it as multiplicative

B Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.

  • below I graph damped and multiplicative holts winter
  • It’s hard to tell from the graphs what the exact values are of the predictions. Therefore I created a boolean value by askign where (hw_mult - hw_mult_damped > 0). Only the first observation in prediction had damped being higher. So damped clearly dampens the multiplicative nature of the dataset
  • I also plotted the entire 5th year prediction of hw_mult and Hw_mult_damped. You can see for all indexes damped is lower
retaildata <- readxl::read_excel("retail.xlsx", skip=1)
myts <- ts(retaildata[,15],
  frequency=12, start=c(1982,4))
autoplot(myts)

fit1 <- hw(myts,seasonal="multiplicative",h=60)
fit2 <- hw(myts,seasonal="additive",h=60)
fit3 <- hw(myts,seasonal="multiplicative",damped = TRUE ,h=60)

year_5_mult <- ts(fit1$mean[48:60])
year_5_mult_damped <- ts(fit3$mean[48:60])
plot1 <- autoplot(myts) +
  autolayer(fit1, series="HW multiplicative forecasts 5 years out", PI=FALSE) 
#    autolayer(fit2, series="HW additive forecasts 5 years out", PI=FALSE)+
plot2 <-  autoplot(myts) +
    autolayer(fit3, series="HW damped multiplicative forecasts 5 years out", PI=FALSE)
plot_grid(plot1,plot2,ncol=1)

fit1 <- hw(myts,seasonal="multiplicative",h=120)
fit2 <- hw(myts,seasonal="additive",h=120)
fit3 <- hw(myts,seasonal="multiplicative",damped = TRUE ,h=120)

plot1 <- autoplot(myts) +
  autolayer(fit1, series="HW multiplicative forecasts 10 years out", PI=FALSE) 
#    autolayer(fit2, series="HW additive forecasts 5 years out", PI=FALSE)+
plot2 <-  autoplot(myts) +
    autolayer(fit3, series="HW damped multiplicative forecasts 10 years out", PI=FALSE)
plot_grid(plot1,plot2,ncol=1)

year_10_predictions <- ts(fit1$mean[108:120])
my_data <- data.frame(cbind(year_5_mult,year_5_mult_damped))

ggplot(data = my_data, mapping = aes(x =as.numeric(rownames(my_data)))) +
  geom_point(aes(y = year_5_mult), shape = "triangle",col="red") +
  geom_point(aes(y = year_5_mult_damped), shape = "square",col="blue")

## Boolean test hw vs hwdamped
fit1$mean-fit3$mean>0
##        Jan   Feb   Mar   Apr   May   Jun   Jul   Aug   Sep   Oct   Nov
## 2014 FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## 2015  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## 2016  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## 2017  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## 2018  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## 2019  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## 2020  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## 2021  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## 2022  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## 2023  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##        Dec
## 2014  TRUE
## 2015  TRUE
## 2016  TRUE
## 2017  TRUE
## 2018  TRUE
## 2019  TRUE
## 2020  TRUE
## 2021  TRUE
## 2022  TRUE
## 2023  TRUE

C Compare accuracy of one step forecasts

  • hw_mult = 4.51
  • hw_damped_mulit= 4.56

D Check that the residuals from the best method look like white noise

  • residuals for each are plotted below
  • they seem normally distributed and appear to look mostly like white noise. However, when plotting the acf of the residuals, it appears there is still some autocorrelation occuring
  • The P values from the Ljung_Box Test indicate that there is autocorrelation
round(forecast::accuracy(fit1),2)
##                 ME RMSE MAE   MPE MAPE MASE ACF1
## Training set -0.09 4.51 3.2 -0.74 4.92 0.58 0.09
round(forecast::accuracy(fit3),2)
##                ME RMSE  MAE  MPE MAPE MASE ACF1
## Training set 0.48 4.56 3.22 0.41 4.91 0.58 0.07
par(mfrow=c(2,2))
plot(myts, fit1$residuals, 
ylab="Residuals", xlab="quantitiy", 
 main="residuals plotted against data hw_mult")
plot(myts, fit3$residuals, 
ylab="Residuals", xlab="quantitiy", 
 main="residuals plotted against data damped_hw_mult")


plot(fit1$residuals,main="residuals for hw_mult  ")
plot(fit3$residuals,main="residuals for hw_mult_damped  ")

checkresiduals(fit1)

## 
##  Ljung-Box test
## 
## data:  Residuals from Holt-Winters' multiplicative method
## Q* = 72.086, df = 8, p-value = 1.887e-12
## 
## Model df: 16.   Total lags used: 24
checkresiduals(fit3)

## 
##  Ljung-Box test
## 
## data:  Residuals from Damped Holt-Winters' multiplicative method
## Q* = 75.911, df = 7, p-value = 9.37e-14
## 
## Model df: 17.   Total lags used: 24
# ##plot hw residuals
# par(mfrow=c(2,2))
# plot(density(fit1$residuals),main="residuals normality test hw_mult") #A density plot
# qqnorm(fit1$residuals,main="qq test for hw_multi") # A quantile normal plot - good for checking normality
# qqline(fit1$residuals)
# par(mfrow=c(2,2))
# 
# ##plot hw_damped residuals
# plot(density(fit3$residuals),main="residuals normality test hw_mult_damped") #A density plot
# qqnorm(fit3$residuals,main="qq test for hw_multi_damped") # A quantile normal plot - good for checking normality
# qqline(fit3$residuals)
# 
# ggAcf(fit1$residuals) + ggtitle("ACF of residuals")
# ggAcf(fit3$residuals) + ggtitle("ACF of residuals")

E Now find the test set RMSE, while training the model to the end of 2010

  • Multiplicative rmse performs the best
## make split and test with multiplicative
train <- head(myts, 345)
test <- window(myts,start=2011)
fit_1 <- hw(train,seasonal="multiplicative",h=36)
predicted <- fit_1$mean
actual <- test
mult_rmse <- rmse(actual, predicted)

## test with multiplicative damped
fit_2 <- hw(train,seasonal="multiplicative",damped = TRUE,h=36)
predicted <- fit_2$mean
actual <- test
damped_mult_rmse <- rmse(actual, predicted)

## Test with snaive method
fc <- snaive(train,h=36)
#accuracy(fc,myts.test)
snaive_rmse <- rmse(actual, fc$mean)
df <- data.frame(rbind(snaive_rmse,damped_mult_rmse,mult_rmse))
colnames(df) <- "rmse"
kable(df,digits =2,'markdown',booktabs =T)
rmse
snaive_rmse 14.86
damped_mult_rmse 12.33
mult_rmse 10.87

Question 9 For the same retail data, try an STL decomposition applied to the Box-Cox transformed series,

  • lambda= -.949
  • it appears the rmse of 28.9 is worse than the models before. Perhaps I am doing soemthing wrong plugging in the ets object. I attmepted to extract seasonal and ran a forcasted_2 prediction and that rmse was poor as well.
  • I attempted to extract the seasonal adjustment w boxcox and without and received rmse of 36 and 19
  • All of the models are plotted with predictions in end of this section
lambda <- BoxCox.lambda(test)
##boxcox convert
test_boxcox <- BoxCox(test, lambda)
train_boxcox <- BoxCox(train, lambda)
##plug into stl
stl_decomposition <- stl(train_boxcox[, 1], s.window = "periodic")
stl_decomposition2 <- stl_decomposition %>% seasadj()

##plug in box cox convert
 
forcasted <- forecast(stl_decomposition,h=36,biasadj = TRUE)

forcasted2 <- forecast(stl_decomposition2,h=36,biasadj = TRUE)
predicted <- forcasted$mean
predicted2 <- forcasted2$mean

## back transform 
invBoxCox <- function(x, lambda) {
    if (lambda == 0) {
        exp(x)} 
    else {(lambda*x + 1)^(1/lambda)} 
}


predicted  <- invBoxCox(predicted,lambda)
predicted2 <- invBoxCox(predicted2,lambda)

## get Rmse
actual <- test
rmse(actual, predicted)
## [1] 28.907
rmse(actual,predicted2)
## [1] 36.39676
## Seasonally adjusted without boxcox
stl_decomposition2 <- stl(train[, 1], s.window = "periodic")
stl_decomposition2 <- stl_decomposition2 %>% seasadj()
forcasted3 <- forecast(stl_decomposition2,h=36)

predicted <- forcasted3$mean
actual <- test
rmse(actual, predicted)
## [1] 19.92006
# decompose_air = decompose(train_boxcox, "multiplicative")
# adjust_air = train_boxcox / decompose_air$seasonal
# plot(adjust_air, lwd = 2)
# adjust_air
# stl_decomposition
plot(forcasted,main="box_cox")

plot(forcasted2,main="box_cox_w_seasonal_adj")

plot(forcasted3,main="seasonal_adj,withou boxcox")