This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE
parameter was added to the code chunk to prevent printing of the R code that generated the plot.
library(fpp2)
## Loading required package: ggplot2
## Loading required package: forecast
## Loading required package: fma
## Loading required package: expsmooth
library(seasonal)
##1.a
spigs<- ses(pigs,h=4)
spigs$model
## Simple exponential smoothing
##
## Call:
## ses(y = pigs, h = 4)
##
## Smoothing parameters:
## alpha = 0.2971
##
## Initial states:
## l = 77260.0561
##
## sigma: 10308.58
##
## AIC AICc BIC
## 4462.955 4463.086 4472.665
#1.b
# 95% prediction interval for the first forecast
spigs$upper[1, "95%"]
## 95%
## 119020.8
spigs$lower[1, "95%"]
## 95%
## 78611.97
# calculate 95% prediction interval using formula
s <- sd(spigs$residuals)
spigs$mean[1] + 1.96*s
## [1] 118952.8
spigs$mean[1] - 1.96*s
## [1] 78679.97
#plot the data
autoplot(spigs) +
autolayer(spigs$fitted)
# Simple Exponential Smoothing function with first 10 terms
SES10<-function(ts,alpha,l_0){
N<-length(ts)
y_hat<-alpha*(ts[N])
for (i in 1:10) {
y_hat<-y_hat + alpha*((1-alpha)^i)*ts[N-i]
}
y_hat2<-y_hat+(1-alpha)^11*l_0
return(y_hat2)
}
#SES Function with first 20 terms
SES20<-function(ts,alpha,l_0){
N<-length(ts)
y_hat<-alpha*(ts[N])
for (i in 1:20) {
y_hat<-y_hat + alpha*((1-alpha)^i)*ts[N-i]
}
y_hat2<-y_hat+(1-alpha)^21*l_0
return(y_hat2)
}
SES10(pigs,0.2971,77258)
## [1] 98315.32
SES20(pigs,0.2971,77258)
## [1] 98803.69
#Close results as ses function
RSSFunction<-function(alpha){
error<-rep(0,167)
for (i in 21:187){
error[i]<-SES20(pigs[1:i],alpha,77000)-pigs[i+1]
}
RSS<-sum(error^2)
return(RSS)
}
RSSFunction(0.3)
## [1] 14125046607
optim(0.5,RSSFunction, method="Brent", lower =0.2, upper = 0.8)
## $par
## [1] 0.2884495
##
## $value
## [1] 14120733938
##
## $counts
## function gradient
## NA NA
##
## $convergence
## [1] 0
##
## $message
## NULL
#opim() = 0.288 while R computed 0.297 for alpha value with smallest RSS.
## Currently unknown
#a.
autoplot(books)
##The sales of paperback and hardcover books all increased as time went on. There aren't any obvious frequency in the trend.
#b.
spaperback <- ses(books[, "Paperback"], h = 4)
shardcover <- ses(books[, "Hardcover"], h = 4)
autoplot(books[, "Paperback"], series = "Paperback") +
autolayer(spaperback, series = "Paperback") +
autolayer(books[, "Hardcover"], series = "Hardcover") +
autolayer(shardcover, series = "Hardcover", PI = FALSE) +
ylab("Sales amount") +
ggtitle("Sales of paperback and hardcover books")
#c
sqrt(mean(spaperback$residuals^2))
## [1] 33.63769
sqrt(mean(shardcover$residuals^2))
## [1] 31.93101
# RMSE shows that the variance of the residuals of hardcover sales was smaller than the one of paperback sales.
#a.
hpaperback <- holt(books[, "Paperback"], h = 4)
hhardcover <- holt(books[, "Hardcover"], h = 4)
autoplot(books[, "Paperback"]) +
autolayer(hpaperback)
autoplot(books[, "Hardcover"]) +
autolayer(hhardcover)
#b.
spaperback <- sqrt(mean(hpaperback$residuals^2))
shardcover <- sqrt(mean(hhardcover$residuals^2))
spaperback
## [1] 31.13692
shardcover
## [1] 27.19358
##RMSE values all become lower when use the Holt Method
#C.
##The forecast of hardcover sales were better than the papeback sales due to the lower RMSE
#d.
writeLines("95% PI of paperback sales calculated by holt function")
## 95% PI of paperback sales calculated by holt function
hpaperback$upper[1, "95%"]
## 95%
## 275.0205
hpaperback$lower[1, "95%"]
## 95%
## 143.913
writeLines("95% PI of paperback sales calculated by formula")
## 95% PI of paperback sales calculated by formula
hpaperback$mean[1] + 1.96*spaperback
## [1] 270.4951
hpaperback$mean[1] - 1.96*spaperback
## [1] 148.4384
writeLines("95% PI of hardcover sales calculated by holt function")
## 95% PI of hardcover sales calculated by holt function
hhardcover$upper[1, "95%"]
## 95%
## 307.4256
hhardcover$lower[1, "95%"]
## 95%
## 192.9222
writeLines("95% PI of hardcover sales calculated by formula")
## 95% PI of hardcover sales calculated by formula
hhardcover$mean[1] + 1.96*shardcover
## [1] 303.4733
hhardcover$mean[1] - 1.96*shardcover
## [1] 196.8745
## The prediction interval for the first forecast for each series was almost same
## In the ses case, the PI was different when it was calculated by ses function and formula respectively.
str(eggs)
## Time-Series [1:94] from 1900 to 1993: 277 315 315 321 315 ...
head(eggs)
## Time Series:
## Start = 1900
## End = 1905
## Frequency = 1
## [1] 276.79 315.42 314.87 321.25 314.54 317.92
autoplot(eggs)
# Holt function with damped option.
holt_damped_eggs <- holt(eggs, damped = TRUE, h = 100)
autoplot(holt_damped_eggs) +
autolayer(holt_damped_eggs$fitted)
# Holt function with Box-Cox transformation.
holt_BoxCox_eggs <- holt(eggs,
lambda = BoxCox.lambda(eggs),
h = 100)
autoplot(holt_BoxCox_eggs) +
autolayer(holt_BoxCox_eggs$fitted)
# Use holt function with Box-Cox transformation and damped option.
holt_BoxCox_damped_eggs <- holt(
eggs,
damped = TRUE,
lambda = BoxCox.lambda(eggs),
h = 100)
autoplot(holt_BoxCox_damped_eggs) +
autolayer(holt_BoxCox_damped_eggs$fitted)
# show RMSE values for each model
writeLines("RMSE when using holt function with damped option")
## RMSE when using holt function with damped option
sqrt(mean(holt_damped_eggs$residuals^2))
## [1] 26.54019
writeLines("RMSE when using holt function with Box-Cox transformation")
## RMSE when using holt function with Box-Cox transformation
sqrt(mean(holt_BoxCox_eggs$residuals^2))
## [1] 1.032217
writeLines("RMSE when using holt function with damped option and Box-Cox transformation")
## RMSE when using holt function with damped option and Box-Cox transformation
sqrt(mean(holt_BoxCox_damped_eggs$residuals^2))
## [1] 1.039187
##The best model is the Box-Cox transformation with Holt's linear method.
#load data
retail <- readxl::read_excel("Desktop/retail.xlsx", skip=1)
## readxl works best with a newer version of the tibble package.
## You currently have tibble v1.4.2.
## Falling back to column name repair from tibble <= v1.4.2.
## Message displays once per session.
tsretail <- ts(retail[, "A3349873A"],
frequency = 12,
start = c(1982, 4))
#a.
autoplot(tsretail)
#b.
etsAAM <- hw(tsretail,
seasonal = "multiplicative")
etsAAdM <- hw(tsretail,
seasonal = "multiplicative",
damped = TRUE)
autoplot(etsAAM)
autoplot(etsAAdM)
#c.
errorAAM <- tsCV(tsretail,
hw, h = 1, seasonal = "multiplicative"
)
errorAAdM <- tsCV(tsretail,
hw, h = 1, seasonal = "multiplicative", damped = TRUE
)
sqrt(mean(errorAAM^2, na.rm = TRUE))
## [1] 14.72762
sqrt(mean(errorAAdM^2, na.rm = TRUE))
## [1] 14.94306
##I prefer damped model.
#d.
checkresiduals(etsAAdM)
##
## Ljung-Box test
##
## data: Residuals from Damped Holt-Winters' multiplicative method
## Q* = 42.932, df = 7, p-value = 3.437e-07
##
## Model df: 17. Total lags used: 24
## It seems like the best method doesn't look like white noise.
#e.
tstrain <- window(tsretail,
end = c(2010, 12))
tstest <- window(tsretail,
start = 2011)
## Holt-Winters' method with damped option.
etstrain <- hw(tstrain,
h = 36,
seasonal = "multiplicative",
damped = TRUE)
autoplot(etstrain)
accuracy(etstrain, tstest)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.4556121 8.681456 6.24903 0.2040939 3.151257 0.3916228
## Test set 94.7346169 111.911266 94.73462 24.2839784 24.283978 5.9369594
## ACF1 Theil's U
## Training set -0.01331859 NA
## Test set 0.60960299 1.90013
## Cannot beat seasonal naive approach.
## Holt-Winters' method.
etstrain <- hw(tstrain,
h = 36,
seasonal = "multiplicative")
autoplot(etstrain)
accuracy(etstrain, tstest)
## ME RMSE MAE MPE MAPE
## Training set 0.03021223 9.107356 6.553533 0.001995484 3.293399
## Test set 78.34068365 94.806617 78.340684 19.945024968 19.945025
## MASE ACF1 Theil's U
## Training set 0.4107058 0.02752875 NA
## Test set 4.9095618 0.52802701 1.613903
##Still cannot beat the seasonal naive approach
library(fpp2)
correlation at different lags.
ggtsdisplay(ibmclose)
#hi
#a.
x1<- BoxCox.lambda(usnetelec)
x1
## [1] 0.5167714
usneBC<- BoxCox(usnetelec,x1)
tsdisplay(usneBC)
##ACF and PACF tells us one differencing would be enough.
#b.
x2<- BoxCox.lambda(usgdp)
x2
## [1] 0.366352
usgdpBC<- BoxCox(usgdp,x2)
plot(usgdpBC)
ggAcf(usgdpBC)
ggPacf(usgdpBC)
##ACF and PACF tells us one differencing would be enough.
#c.
x3<- BoxCox.lambda(mcopper)
x3
## [1] 0.1919047
mcopperBC<- BoxCox(mcopper,x3)
tsdisplay(mcopperBC)
##PACF tells us two differencing would be needed.
#d.
x4<- BoxCox.lambda(enplanements)
x4
## [1] -0.2269461
enplanementsBC<- BoxCox(enplanements,x4)
tsdisplay(enplanementsBC)
##complicated PACF tells us seasonal differencing would be needed.
#e.
x5<- BoxCox.lambda(visitors)
x5
## [1] 0.2775249
visitorsBC<- BoxCox(visitors,x5)
tsdisplay(visitorsBC)
##PACF tells us seasonal differencing would be needed.
seasonal difference. ##(1 - B^12)yt = y’t
retaildata <- readxl::read_excel("/Users/chiben/Desktop/retail.xlsx", skip=1)
retail.ts <- ts(retaildata[,"A3349873A"], frequency=12, start=c(1982,4))
x6 <- BoxCox.lambda(retail.ts)
x6
## [1] 0.1276369
retail.BoxCox<-BoxCox(retail.ts,x6)
tsdisplay(retail.BoxCox)
##PACF tells us seasonal differencing would be needed
#a.
y <- ts(numeric(100))
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.6*y[i-1] + e[i]
#b.
ar1generator <- function(phi1){
y <- ts(numeric(100))
e <- rnorm(100)
for(i in 2:100){
y[i] <- phi1*y[i-1] + e[i]
}
return(y)
}
autoplot(ar1generator(0.3), series = "0.3") +
geom_line(size = 1, colour = "red") +
autolayer(y, series = "0.6", size = 1) +
autolayer(ar1generator(0.9), size = 1, series = "0.9") +
ylab("AR(1) models") +
guides(colour = guide_legend(title = "Phi1"))
##As phi increases, the variation of y increases.
#c.
ma1generator <- function(theta1){
y <- ts(numeric(100))
e <- rnorm(100)
for(i in 2:100){
y[i] <- theta1*e[i-1] + e[i]
}
return(y)
}
#d.
autoplot(ma1generator(0.3), series = "0.3") +
geom_line(size = 1, colour = "red") +
autolayer(y, series = "0.6", size = 1) +
autolayer(ar1generator(0.9), size = 1, series = "0.9") +
ylab("MA(1) models") +
guides(colour = guide_legend(title = "Theta1"))
##As theta increase, the variation of y increases.
#e.
y_arima.1.0.1 <- ts(numeric(50))
e <- rnorm(50)
for(i in 2:50){
y_arima.1.0.1[i] <- 0.6*y_arima.1.0.1[i-1] + 0.6*e[i-1] + e[i]
}
#f.
y_arima.2.0.0 <- ts(numeric(50))
e <- rnorm(50)
for(i in 3:50){
y_arima.2.0.0[i] <- -0.8*y_arima.2.0.0[i-1] + 0.3*y_arima.2.0.0[i-2] + e[i]
}
#g.
autoplot(y_arima.1.0.1, series = "ARMA(1, 1)") +
autolayer(y_arima.2.0.0, series = "AR(2)") +
ylab("y") +
guides(colour = guide_legend(title = "Models"))
autoplot(y_arima.1.0.1)
##AR(2) are non-stationary data. ARMA(1,1) are stationary.
#a.
tsdisplay(wmurders)
##ACF and PACF tells us differencing needed.
wmurder1<- diff(wmurders)
tsdisplay(wmurder1)
##ACF and PACF tells us (2,1,0) or (0,1,2).
m1<- arima(wmurders,order=c(0,1,2))
m2<- arima(wmurders,order=c(2,1,0))
accuracy(m1)
## ME RMSE MAE MPE MAPE
## Training set 0.0007242355 0.1997392 0.1543531 -0.08224024 4.434684
## MASE ACF1
## Training set 0.9491994 0.005880608
accuracy(m2)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.001232357 0.2008046 0.1544929 -0.06022595 4.436949 0.950059
## ACF1
## Training set 0.005925284
##Close in the training set, both models may work.
#b.
##Twice integrated constant would yield quadratic trend. Dangerous for forecasting.
#c.
##Arima(2,1,0): (1−theta1B−theta2B2)(1−B)yt=ϵt
##Arima(0,1,2): $ (1-B)y_t=(1+_1 B +_2 B^2)_t$
#d.
plot(residuals(m1))
##No obvious pattern in the residual plot. The model is stationary.
#f.
fc1<- forecast(m1)
plot(fc1)
#g.
m3<- auto.arima(wmurders)
m3
## Series: wmurders
## ARIMA(1,2,1)
##
## Coefficients:
## ar1 ma1
## -0.2434 -0.8261
## s.e. 0.1553 0.1143
##
## sigma^2 estimated as 0.04632: log likelihood=6.44
## AIC=-6.88 AICc=-6.39 BIC=-0.97
##auto.arima chooses (1,2,1)
accuracy(m1)
## ME RMSE MAE MPE MAPE
## Training set 0.0007242355 0.1997392 0.1543531 -0.08224024 4.434684
## MASE ACF1
## Training set 0.9491994 0.005880608
accuracy(m2)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.001232357 0.2008046 0.1544929 -0.06022595 4.436949 0.950059
## ACF1
## Training set 0.005925284
accuracy(m3)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.01065956 0.2072523 0.1528734 -0.2149476 4.335214 0.9400996
## ACF1
## Training set 0.02176343
##The performance of 3 models are close.
#a.
austa.arima<-auto.arima(austa)
austa.arima
## Series: austa
## ARIMA(0,1,1) with drift
##
## Coefficients:
## ma1 drift
## 0.3006 0.1735
## s.e. 0.1647 0.0390
##
## sigma^2 estimated as 0.03376: log likelihood=10.62
## AIC=-15.24 AICc=-14.46 BIC=-10.57
##auto.arima choose (0,1,1) model with drift = 0.17
autoplot(residuals(austa.arima))
##Yes, the residuals looks like white noise
fcausta1<-forecast(austa.arima,h=10)
autoplot(fcausta1)
#b.
austa.arima2<-Arima(austa,order=c(0,1,1), include.drift=FALSE)
austa.arima3<-Arima(austa,order=c(0,1,0), include.drift=FALSE)
fcausta2<-forecast(austa.arima2, h=10)
fcausta3<-forecast(austa.arima3, h=10)
autoplot(fcausta2)
autoplot(fcausta3)
##Forecast results look like the same, remove MA make the prediction interval smaller.
#c.
fc_austa_arima.2.1.3.drift <- forecast(
Arima(austa, order = c(2, 1, 3), include.drift = TRUE),
h = 10
)
autoplot(fc_austa_arima.2.1.3.drift)
##The forecasts are increasing, but the speed of the increase is decreasing.
drift_austa <- fc_austa_arima.2.1.3.drift$model$coef[6]
fc_austa_arima.2.1.3.nodrift <- fc_austa_arima.2.1.3.drift$mean - drift_austa*seq_len(10)
autoplot(fc_austa_arima.2.1.3.drift) +
autolayer(fc_austa_arima.2.1.3.nodrift)
##Without drift constant, the forecasts are unlikely.
#d.
fc_austa_arima.0.0.1.const <- forecast(
Arima(
austa, order = c(0, 0, 1), include.constant = TRUE
),
h = 10
)
autoplot(fc_austa_arima.0.0.1.const)
##the forecasts are fastly decreased to the mean of the data history.
fc_austa_arima.0.0.0.const <- forecast(
Arima(austa, order = c(0, 0, 0), include.constant = TRUE),
h = 10
)
autoplot(fc_austa_arima.0.0.0.const)
##All of the forecasts are the mean of the data history. It is like the result of mean method.
#e.
fc_austa_arima.0.2.1 <- forecast(
Arima(austa, order = c(0, 2, 1)),
h = 10
)
autoplot(fc_austa_arima.0.2.1)
# the forecasts show increasing trend. PI is being larger for the farther future forecast.
#a.
x1 <- BoxCox.lambda(usgdp)
x1
## [1] 0.366352
usgdpBC<-BoxCox(usgdp,x1)
#b.
usgdp.arima<- auto.arima(usgdpBC)
usgdp.arima
## Series: usgdpBC
## ARIMA(2,1,0) with drift
##
## Coefficients:
## ar1 ar2 drift
## 0.2795 0.1208 0.1829
## s.e. 0.0647 0.0648 0.0202
##
## sigma^2 estimated as 0.03518: log likelihood=61.56
## AIC=-115.11 AICc=-114.94 BIC=-101.26
#c.
usgdp.arima2<-Arima(usgdpBC,order=c(1,1,1))
usgdp.arima3<-Arima(usgdpBC,order=c(1,2,0))
usgdp.arima4<-Arima(usgdpBC,order=c(2,2,0))
accuracy(usgdp.arima)
## ME RMSE MAE MPE MAPE
## Training set 0.0006791305 0.1859861 0.1417489 -0.004468837 0.264737
## MASE ACF1
## Training set 0.1789208 0.009570738
accuracy(usgdp.arima2)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.03707771 0.1982065 0.1529715 0.06644196 0.2849382 0.1930864
## ACF1
## Training set 0.0009082095
accuracy(usgdp.arima3)
## ME RMSE MAE MPE MAPE
## Training set 0.001944469 0.2089466 0.1578764 0.004392134 0.2960049
## MASE ACF1
## Training set 0.1992776 -0.05986977
accuracy(usgdp.arima4)
## ME RMSE MAE MPE MAPE
## Training set 0.002157232 0.2069215 0.1589324 0.004868899 0.2978158
## MASE ACF1
## Training set 0.2006105 -0.02328162
#d.
tsdisplay(residuals(usgdp.arima))
#e.
fcusgdp<-forecast(usgdp.arima,h = 12)
plot(fcusgdp)
##Forecast seems reasonable
#f.
usgdp.ets<-ets(usgdpBC)
accuracy(usgdp.arima)
## ME RMSE MAE MPE MAPE
## Training set 0.0006791305 0.1859861 0.1417489 -0.004468837 0.264737
## MASE ACF1
## Training set 0.1789208 0.009570738
accuracy(usgdp.ets)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.03597887 0.1984366 0.1536758 0.06463645 0.2867675 0.1939754
## ACF1
## Training set -0.01393144
##Arima is better thhan ETS
#a.
tsdisplay(austourists)
#Increasing trend, seasonality.
#b.
##ACF tells us high autocorrelation at lags 4,8,12,16.
#c.
##Spike at lag4 of PACF plot.
#d.
ggtsdisplay(diff(austourists, lag = 4))
ggtsdisplay(diff(diff(austourists, lag = 4)))
## ARIMA(1,1,0), ARIMA(1,0,0)[4]
#e.
austo.arima<-auto.arima(austourists)
austo.arima
## Series: austourists
## ARIMA(1,0,0)(1,1,0)[4] with drift
##
## Coefficients:
## ar1 sar1 drift
## 0.4705 -0.5305 0.5489
## s.e. 0.1154 0.1122 0.0864
##
## sigma^2 estimated as 5.15: log likelihood=-142.48
## AIC=292.97 AICc=293.65 BIC=301.6
##YES.
#f.
##NO IDEA.
#a.
tsdisplay(usmelec)
usme.MA<-ma(usmelec, order=12)
plot(usme.MA)
##Total net generation amount increased first but stoped increasing from about 2008.
#b.
z1 <- BoxCox.lambda(usmelec)
z1
## [1] -0.5738331
usme.BoxCox<-BoxCox(usmelec,z1)
##YES, we need transformation
#c.
tsdisplay(usme.BoxCox)
usme.dif1<-diff(usme.BoxCox)
tsdisplay(usme.dif1)
##Autocorrelation every 12 month.
length(usmelec)
## [1] 486
usme.dif2<-usme.dif1[13:486]-usme.dif1[1:474]
tsdisplay(usme.dif2)
#d.
usme.arima<-auto.arima(usme.BoxCox)
usme.arima
## Series: usme.BoxCox
## ARIMA(1,1,3)(2,1,1)[12]
##
## Coefficients:
## ar1 ma1 ma2 ma3 sar1 sar2 sma1
## 0.3952 -0.8194 -0.0468 0.0414 0.0403 -0.0934 -0.8462
## s.e. 0.3716 0.3734 0.1707 0.1079 0.0560 0.0533 0.0343
##
## sigma^2 estimated as 1.278e-06: log likelihood=2547.75
## AIC=-5079.5 AICc=-5079.19 BIC=-5046.23
usme.arima1<-Arima(usme.BoxCox,order=c(1,0,1), seasonal=c(0,1,0))
usme.arima2<-Arima(usme.BoxCox,order=c(1,0,1), seasonal=c(0,1,1))
usme.arima3<-auto.arima(usme.BoxCox)
usme.arima4<-auto.arima(usmelec)
summary(usme.arima1)
## Series: usme.BoxCox
## ARIMA(1,0,1)(0,1,0)[12]
##
## Coefficients:
## ar1 ma1
## 0.8941 -0.4771
## s.e. 0.0317 0.0708
##
## sigma^2 estimated as 2.049e-06: log likelihood=2440.39
## AIC=-4874.78 AICc=-4874.73 BIC=-4862.29
##
## Training set error measures:
## ME RMSE MAE MPE MAPE
## Training set 0.0002133574 0.001410751 0.001117608 0.01282924 0.06701427
## MASE ACF1
## Training set 0.7450607 0.06935111
summary(usme.arima2)
## Series: usme.BoxCox
## ARIMA(1,0,1)(0,1,1)[12]
##
## Coefficients:
## ar1 ma1 sma1
## 0.9966 -0.5264 -0.8476
## s.e. 0.0035 0.0621 0.0283
##
## sigma^2 estimated as 1.356e-06: log likelihood=2535.5
## AIC=-5063.01 AICc=-5062.92 BIC=-5046.36
##
## Training set error measures:
## ME RMSE MAE MPE MAPE
## Training set 6.999609e-05 0.001146516 0.0008990479 0.004230004 0.05393229
## MASE ACF1
## Training set 0.5993564 0.1591829
summary(usme.arima3)
## Series: usme.BoxCox
## ARIMA(1,1,3)(2,1,1)[12]
##
## Coefficients:
## ar1 ma1 ma2 ma3 sar1 sar2 sma1
## 0.3952 -0.8194 -0.0468 0.0414 0.0403 -0.0934 -0.8462
## s.e. 0.3716 0.3734 0.1707 0.1079 0.0560 0.0533 0.0343
##
## sigma^2 estimated as 1.278e-06: log likelihood=2547.75
## AIC=-5079.5 AICc=-5079.19 BIC=-5046.23
##
## Training set error measures:
## ME RMSE MAE MPE
## Training set -5.884034e-05 0.001107007 0.0008378929 -0.003530687
## MAPE MASE ACF1
## Training set 0.05024385 0.558587 0.0006701709
##Second model's AIC beats the auto.arima model's AIC
tsdisplay(residuals(usme.arima2))
##The residuals look like the white noise.
#f.
#a.
autoplot(mcopper)
# they are monthly data but there isn't seasonality in them.
autoplot(BoxCox(mcopper, BoxCox.lambda(mcopper)))
# It looked like Box-Cox transformation makes the variations in the data evenly over time. Therefore I'm going to use the transformation.
lambda_mcopper <- BoxCox.lambda(mcopper)
#b.
mcopper_autoarima <- auto.arima(
mcopper,
lambda = lambda_mcopper
)
mcopper_autoarima
## Series: mcopper
## ARIMA(0,1,1)
## Box Cox transformation: lambda= 0.1919047
##
## Coefficients:
## ma1
## 0.3720
## s.e. 0.0388
##
## sigma^2 estimated as 0.04997: log likelihood=45.05
## AIC=-86.1 AICc=-86.08 BIC=-77.43
# auto.arima yielded ARIMA(0, 1, 1) with Box-Cox transformation model. AICc was -86.08.
#c.
ndiffs(mcopper)
## [1] 1
nsdiffs(mcopper)
## [1] 0
ggtsdisplay(diff(mcopper))
mcopper_arima.1.1.0 <- Arima(
mcopper, order = c(1, 1, 0), lambda = lambda_mcopper
)
mcopper_arima.1.1.0
## Series: mcopper
## ARIMA(1,1,0)
## Box Cox transformation: lambda= 0.1919047
##
## Coefficients:
## ar1
## 0.3231
## s.e. 0.0399
##
## sigma^2 estimated as 0.05091: log likelihood=39.83
## AIC=-75.66 AICc=-75.64 BIC=-66.99
## AICc was -75.64.
mcopper_arima.5.1.0 <- Arima(
mcopper, order = c(5, 1, 0), lambda = lambda_mcopper
)
mcopper_arima.5.1.0
## Series: mcopper
## ARIMA(5,1,0)
## Box Cox transformation: lambda= 0.1919047
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5
## 0.3706 -0.1475 0.0609 -0.0038 0.0134
## s.e. 0.0421 0.0450 0.0454 0.0450 0.0422
##
## sigma^2 estimated as 0.05028: log likelihood=45.32
## AIC=-78.63 AICc=-78.48 BIC=-52.63
## AICc was -78.48.
#d.
checkresiduals(mcopper_autoarima)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1)
## Q* = 22.913, df = 23, p-value = 0.4659
##
## Model df: 1. Total lags used: 24
##The residuals are white noise..
#e.
fc_mcopper_autoarima <- forecast(
mcopper_autoarima
)
autoplot(fc_mcopper_autoarima)
## Bad forecasts
fc_mcopper_arima.1.1.0 <- forecast(
mcopper_arima.1.1.0
)
autoplot(fc_mcopper_arima.1.1.0)
## Almost same result
fc_mcopper_arima.5.1.0 <- forecast(
mcopper_arima.5.1.0
)
autoplot(fc_mcopper_arima.5.1.0)
# Almost same result
#f.
fc_mcopper_ets <- forecast(
ets(mcopper)
)
autoplot(fc_mcopper_ets)
##much more reasonable
tsdisplay(hsales)
str(hsales)
## Time-Series [1:275] from 1973 to 1996: 55 60 68 63 65 61 54 52 46 42 ...
#a.
z3 <- BoxCox.lambda(hsales)
z3
## [1] 0.1454608
hsales.BoxCox<-BoxCox(hsales,z3)
#b.
hsales.sdif<-hsales.BoxCox[13:275]-usme.BoxCox[1:262]
## Warning in hsales.BoxCox[13:275] - usme.BoxCox[1:262]: longer object length
## is not a multiple of shorter object length
tsdisplay(hsales.sdif)
hsales.dif2<-diff(hsales.sdif)
tsdisplay(hsales.dif2)
## One seasonal differencing and one diff would be enough.
#c.
hsales.arima1<-Arima(hsales.BoxCox,order=c(1,0,0), seasonal=c(1,0,0))
hsales.arima2<-Arima(hsales.BoxCox,order=c(1,0,0), seasonal=c(1,0,1))
hsales.arima3<-auto.arima(hsales.BoxCox)
hsales.arima1
## Series: hsales.BoxCox
## ARIMA(1,0,0)(1,0,0)[12] with non-zero mean
##
## Coefficients:
## ar1 sar1 mean
## 0.8508 0.5562 5.3282
## s.e. 0.0310 0.0499 0.1488
##
## sigma^2 estimated as 0.03102: log likelihood=85.92
## AIC=-163.84 AICc=-163.69 BIC=-149.37
hsales.arima2
## Series: hsales.BoxCox
## ARIMA(1,0,0)(1,0,1)[12] with non-zero mean
##
## Coefficients:
## ar1 sar1 sma1 mean
## 0.9091 0.9999 -0.9833 5.3204
## s.e. 0.0244 0.0004 0.0257 0.5000
##
## sigma^2 estimated as 0.0218: log likelihood=119.35
## AIC=-228.69 AICc=-228.47 BIC=-210.61
hsales.arima3
## Series: hsales.BoxCox
## ARIMA(1,0,0)(1,1,0)[12] with drift
##
## Coefficients:
## ar1 sar1 drift
## 0.9038 -0.4603 -0.0012
## s.e. 0.0274 0.0558 0.0064
##
## sigma^2 estimated as 0.03176: log likelihood=79.8
## AIC=-151.6 AICc=-151.44 BIC=-137.31
##ARIMA(1,0,0)(2,0,0)[12] has the best AIC
#d..
tsdisplay(residuals(hsales.arima3))
##ACF and PACF tells us there are white noise.
#e.
fcast.hsales.arima<-forecast(hsales.arima3, h=24)
plot(fcast.hsales.arima)
#f.
hsales.ets<-ets(hsales.BoxCox, model="ZZZ")
fcast.hsales.ets<-forecast(hsales.ets, h=24)
plot(fcast.hsales.ets)
accuracy(fcast.hsales.arima)
## ME RMSE MAE MPE MAPE
## Training set 0.003189591 0.1732846 0.1380209 0.002487638 2.632293
## MASE ACF1
## Training set 0.4684505 -0.05134115
accuracy(fcast.hsales.ets)
## ME RMSE MAE MPE MAPE
## Training set -0.0003033384 0.1470773 0.1170746 -0.05679777 2.23873
## MASE ACF1
## Training set 0.3973578 0.09236609
##ETS is a little bit better than ARIMA.
hsales.dec<-stl(hsales.BoxCox, t.window=13, s.window="periodic")
hsales.seasadj<-seasadj(hsales.dec)
hsales.STL <- stlf(hsales.seasadj, method="arima")
fcast.hsales.STL<-forecast(hsales.STL, h=24)
plot(fcast.hsales.STL)
accuracy(fcast.hsales.STL)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.002313283 0.1332736 0.1069011 -0.110414 2.034485 0.3628282
## ACF1
## Training set -0.05139429
##STLF is better than ETS and ARIMA
#a.
fc_retail_autoarima <- forecast(
auto.arima(retail.ts),
h = 36
)
autoplot(fc_retail_autoarima)
# ARIMA(1, 0, 2)(0, 1, 1)[12] with drift model was chosen.
#b.
fc_retail_snaive <- snaive(retail.ts, h = 36)
autoplot(fc_retail_snaive)
c_retail_ets <- forecast(
ets(retail.ts, lambda = BoxCox.lambda(retail.ts)),
h = 36
)
autoplot(etstrain)
#c.
#a.
autoplot(sheep)
##Decreasing trend, no seasonality
#b.
##ARIMA(3,1,0)
#c.
ggtsdisplay(diff(sheep))
##ACF tells decreasing autocorrelation
#d.
sheep.1940 = 1797 + 0.42*(1797 - 1791) -0.20*(1791 - 1627) - 0.30*(1627 - 1665)
sheep.1941 = sheep.1940 + 0.42*(sheep.1940 - 1797) -0.20*(1797 - 1791) - 0.30*(1791 - 1627)
sheep.1942 = sheep.1941 + 0.42*(sheep.1941 - sheep.1940) -0.20*(sheep.1940 - 1797) - 0.30*(1797 - 1791)
c(sheep.1940, sheep.1941, sheep.1942)
## [1] 1778.120 1719.790 1697.268
#e.
fc_sheep_arima.3.1.0 <- forecast(
Arima(sheep, order = c(3, 1, 0)),
h = 3
)
fc_sheep_arima.3.1.0$mean
## Time Series:
## Start = 1940
## End = 1942
## Frequency = 1
## [1] 1777.996 1718.869 1695.985
ar1 <- fc_sheep_arima.3.1.0$model$coef[1]
ar2 <- fc_sheep_arima.3.1.0$model$coef[2]
ar3 <- fc_sheep_arima.3.1.0$model$coef[3]
sheep.1940.new = 1797 + ar1*(1797 - 1791) + ar2*(1791 - 1627) + ar3*(1627 - 1665)
sheep.1941.new = sheep.1940.new + ar1*(sheep.1940.new - 1797) + ar2*(1797 - 1791) + ar3*(1791 - 1627)
sheep.1942.new = sheep.1941.new + ar1*(sheep.1941.new - sheep.1940.new) + ar2*(sheep.1940.new - 1797) + ar3*(1797 - 1791)
c(sheep.1940.new, sheep.1941.new, sheep.1942.new)
## ar1 ar1 ar1
## 1777.996 1718.869 1695.985
#a.