Anggota Kelompok
This exercise uses data set LakeHuron giving the level of Lake Huron from 1875–1972.
huron_ts <- as_tsibble(huron)#Trend for the whole time period
trend <- time(huron)
#Trend after the knot at 1920
trend2 <- pmax(trend-1920, 0)
#Fit piecewise linear trend model
fit <- auto.arima(huron, xreg = cbind(trend,trend2))#Create values for regressors trend and trend2 for the next 30 years
trend.fc <- max(time(huron)) + seq(30)
trend2.fc <- trend.fc - 1920
#Produce forecast
fc <- forecast(fit, xreg = cbind(trend.fc,trend2.fc))## Warning in forecast.forecast_ARIMA(fit, xreg = cbind(trend.fc, trend2.fc)): xreg
## contains different column names from the xreg used in training. Please check
## that the regressors are in the same order.
#Plot forecast
autoplot(fc) +
autolayer(huron - residuals(fit, type='regression'), series="Fitted trend")Note that the argument residuals(fit, type=‘regression’) refer to the regression error, ηt and not the ARIMA errors.
By subtracting the regression errors from the data huron, what is left is the piecewise linear trend.
Repeat Exercise 4 from Section 7.10, but this time adding in ARIMA errors to address the autocorrelations in the residuals.
data.lm <- lm(Sales~Month, souvenirs)
summary(data.lm)##
## Call:
## lm(formula = Sales ~ Month, data = souvenirs)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15097 -7053 -1500 2391 75358
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -74313.980 14578.023 -5.098 2.17e-06 ***
## Month 11.862 1.942 6.109 3.21e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13130 on 82 degrees of freedom
## Multiple R-squared: 0.3128, Adjusted R-squared: 0.3044
## F-statistic: 37.32 on 1 and 82 DF, p-value: 3.207e-08
data <- auto.arima(souvenirs)
summary(data)## Series: souvenirs
## ARIMA(1,1,1)(0,1,1)[12]
##
## Coefficients:
## ar1 ma1 sma1
## 0.2401 -0.9013 0.7499
## s.e. 0.1427 0.0709 0.1790
##
## sigma^2 estimated as 16146440: log likelihood=-693.69
## AIC=1395.38 AICc=1395.98 BIC=1404.43
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 328.301 3615.374 2171.002 -2.481166 15.97302 0.4905797 -0.02521172
autoplot(data) in the ARIMA models, the regression coefficients values are between 0 and 1. So it’s easier to make the conclusion. Not like the linear models where the values are to big and the graph seems to much different
data <- auto.arima(souvenirs)
forecast(data, h=10)## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 1994 15502.41 10328.76 20676.06 7589.993 23414.82
## Feb 1994 15313.95 9852.74 20775.15 6961.752 23666.14
## Mar 1994 27533.10 21993.23 33072.97 19060.599 36005.60
## Apr 1994 25361.02 19772.78 30949.27 16814.546 33907.50
## May 1994 25235.21 19604.96 30865.46 16624.491 33845.93
## Jun 1994 26880.98 21210.39 32551.56 18208.568 35553.38
## Jul 1994 35811.60 30101.29 41521.91 27078.430 44544.76
## Aug 1994 37918.78 32169.09 43668.46 29125.389 46712.17
## Sep 1994 35262.00 29473.22 41050.77 26408.830 44115.16
## Oct 1994 36029.60 30202.03 41857.18 27117.093 44942.11
autoplot(forecast(data, h=2)) The data shows that the sales will over so quickly not like if we don’t use the ARIMA models. That’s shows us that ARIMA won’t be always give us the best option.
data <- auto.arima(souvenirs)
tsdisplay(residuals(data), lag.max=12, main='(1,1,1)(0,1,1) Model Residuals')Repeat the daily electricity example, but instead of using a quadratic function of temperature, use a piecewise linear function with the “knot” around 25 degrees Celsius (use predictors Temperature & Temp2). How can you optimise the choice of knot?
The data can be created as follows.
vic_elec_daily <- vic_elec %>%
filter(year(Time) == 2014) %>%
index_by(Date = date(Time)) %>%
summarise(
Demand = sum(Demand)/1e3,
Temperature = max(Temperature),
Holiday = any(Holiday)) %>%
mutate(
Temp2 = I(pmax(Temperature-25,0)),
Day_Type = case_when(
Holiday ~ "Holiday",
wday(Date) %in% 2:6 ~ "Weekday",
TRUE ~ "Weekend"))First, we will leave the knot at 25 and find the best ARIMA model. This will take a while, but we only have to do it once.
vic_elec_daily %>%
model(
fit = ARIMA(Demand ~ Temperature + Temp2 + (Day_Type=="Weekday"),
approximation=FALSE, stepwise=FALSE,
order_constraint = p+q+P+Q <= 10)
) %>%
report()## Series: Demand
## Model: LM w/ ARIMA(1,1,1)(2,0,0)[7] errors
##
## Coefficients:
## ar1 ma1 sar1 sar2 Temperature Temp2
## 0.7877 -0.9752 0.1990 0.2922 -0.7100 4.4206
## s.e. 0.0464 0.0210 0.0522 0.0563 0.1656 0.2573
## Day_Type == "Weekday"TRUE
## 31.4670
## s.e. 1.3287
##
## sigma^2 estimated as 61.09: log likelihood=-1262.42
## AIC=2540.85 AICc=2541.25 BIC=2572.02
Now we will use that ARIMA(5,1,2)(2,0,0)[7] model and modify the knot until the AICc is optimized.
elec_model <- vic_elec_daily %>%
mutate(Temp2 = I(pmax(Temperature-28.4,0))) %>%
model(
fit = ARIMA(Demand ~ Temperature + Temp2 +(Day_Type=="Weekday")+
pdq(5,1,2) + PDQ(2,0,0),
approximation=FALSE, order_constraint = p+q+P+Q <= 10)
)
glance(elec_model) %>% pull(AICc)## week
## 2528.041
The optimal knot (to 1 decimal place) is 28.4 degrees Celsius.
report(elec_model)## Series: Demand
## Model: LM w/ ARIMA(5,1,2)(2,0,0)[7] errors
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ma1 ma2 sar1 sar2
## 0.3078 0.3770 0.0517 -0.1440 0.1526 -0.5134 -0.4502 0.1697 0.3211
## s.e. 0.2106 0.1558 0.0631 0.0602 0.0636 0.2067 0.1945 0.0596 0.0564
## Temperature Temp2 Day_Type == "Weekday"TRUE
## 0.0782 5.3544 31.0524
## s.e. 0.1345 0.3389 1.2819
##
## sigma^2 estimated as 57.99: log likelihood=-1250.5
## AIC=2527 AICc=2528.04 BIC=2577.66
augment(elec_model) %>%
left_join(vic_elec_daily) %>%
ggplot(aes(x = Temperature)) +
geom_point(aes(y = Demand)) +
geom_point(aes(y = .fitted), col = "red")## Joining, by = c("Date", "Demand")
augment(elec_model) %>%
left_join(vic_elec_daily) %>%
ggplot(aes(x = Demand)) +
geom_point(aes(y = .fitted)) +
geom_abline(aes(intercept = 0, slope = 1))## Joining, by = c("Date", "Demand")
gg_tsresiduals(elec_model)augment(elec_model) %>%
features(.resid, ljung_box, dof = 12, lag = 21)## # A tibble: 1 x 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 fit 15.1 0.0881
The model passes the residual tests and seems to give reasonable forecasts. The residuals look like they have some heteroskedasticity, but otherwise looks ok.
vic_next_day <- new_data(vic_elec_daily, 1) %>%
mutate(
Temperature = 26,
Temp2 = I(pmax(Temperature-28.4,0)),
Day_Type = "Holiday")
forecast(elec_model, vic_next_day)## # A fable: 1 x 7 [1D]
## # Key: .model [1]
## .model Date Demand .mean Temperature Temp2 Day_Type
## <chr> <date> <dist> <dbl> <dbl> <I<dbl>> <chr>
## 1 fit 2015-01-01 N(160, 58) 160. 26 0 Holiday
vic_elec_future <- new_data(vic_elec_daily, 14) %>%
mutate(
Temperature = 26,
Temp2 = I(pmax(Temperature-28.4,0)),
Holiday = c(TRUE, rep(FALSE,13)),
Day_Type = case_when(
Holiday ~ "Holiday",
wday(Date) %in% 2:6 ~ "Weekday",
TRUE ~ "Weekend"))
forecast(elec_model, vic_elec_future) %>%
autoplot(vic_elec_daily) +
ylab("Electricity demand (GW)")This exercise concerns aus_accommodation: the total quarterly takings from accommodation and the room occupancy level for hotels, motels, and guest houses in Australia, between January 1998 and June 2016. Total quarterly takings are in millions of Australian dollars.
us_gasoline %>% autoplot(Barrels)get_cv <- function(K, knot1, knot2) {
us_gasoline %>%
model(TSLM(Barrels ~ fourier(K=K) +
trend(c(knot1,knot2)))) %>%
glance() %>%
pull(CV)
}
models <- expand.grid(
K = seq(25),
knot1 = yearweek(as.character(seq(1991,2017,2))),
knot2 = yearweek(as.character(seq(1991,2017,2)))) %>%
filter(knot2 - knot1 > 104) %>%
as_tibble()
models <- models %>%
mutate(cv = purrr::pmap_dbl(models, get_cv)) %>%
arrange(cv)
(best <- head(models,1))## # A tibble: 1 x 4
## K knot1 knot2 cv
## <int> <week> <week> <dbl>
## 1 6 2007 W01 2013 W01 0.0641
fit <- us_gasoline %>%
model(
TSLM(Barrels ~ fourier(K=best$K) +
trend(c(best$knot1,best$knot2)))
)fit <- us_gasoline %>%
model(
ARIMA(Barrels ~ fourier(K = best$K) + trend(c(best$knot1, best$knot2))
+ PDQ(0,0,0))
)
fit %>% report()## Series: Barrels
## Model: LM w/ ARIMA(1,0,1) errors
##
## Coefficients:
## ar1 ma1 fourier(K = best$K)C1_52 fourier(K = best$K)S1_52
## 0.9277 -0.8414 -0.1144 -0.2306
## s.e. 0.0256 0.0357 0.0133 0.0132
## fourier(K = best$K)C2_52 fourier(K = best$K)S2_52
## 0.0418 0.0309
## s.e. 0.0105 0.0105
## fourier(K = best$K)C3_52 fourier(K = best$K)S3_52
## 0.0836 0.0343
## s.e. 0.0097 0.0097
## fourier(K = best$K)C4_52 fourier(K = best$K)S4_52
## 0.0187 0.0399
## s.e. 0.0094 0.0094
## fourier(K = best$K)C5_52 fourier(K = best$K)S5_52
## -0.0315 0.0011
## s.e. 0.0092 0.0092
## fourier(K = best$K)C6_52 fourier(K = best$K)S6_52
## -0.0523 0.0001
## s.e. 0.0092 0.0092
## trend(c(best$knot1, best$knot2))trend
## 0.0028
## s.e. 0.0001
## trend(c(best$knot1, best$knot2))trend_831
## -0.0051
## s.e. 0.0002
## trend(c(best$knot1, best$knot2))trend_1144 intercept
## 0.0055 7.1065
## s.e. 0.0006 0.0352
##
## sigma^2 estimated as 0.06051: log likelihood=-13.38
## AIC=64.76 AICc=65.33 BIC=163.78
gg_tsresiduals(fit)augment(fit) %>% features(.resid, ljung_box, dof = 18, lag = 24)## # A tibble: 1 x 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 "ARIMA(Barrels ~ fourier(K = best$K) + trend(c(best$knot1, ~ 23.6 0.000623
##d.
fit %>%
forecast(h = "1year") %>%
autoplot(us_gasoline)We fitted a harmonic regression model to part of the us_gasoline series in Exercise 6 in Section 7.10. We will now revisit this model, and extend it to include more data and ARMA errors.
Let’s optimize using 2 break points and an unknown number of Fourier terms. Because the number of Fourier terms is integer, we can’t just use optim. Instead, we will loop over a large number of possible values for the breakspoints and Fourier terms. There are more than 2000 models fitted here, but TSLM is relatively fast.
Note that the possible values of the knots must be restricted so that knot2 is always much larger than knot1. We hacve set them to be at least 2 years apart here.
us_gasoline %>% autoplot(Barrels)get_cv <- function(K, knot1, knot2) {
us_gasoline %>%
model(TSLM(Barrels ~ fourier(K=K) +
trend(c(knot1,knot2)))) %>%
glance() %>%
pull(CV)
}
models <- expand.grid(
K = seq(25),
knot1 = yearweek(as.character(seq(1991,2017,2))),
knot2 = yearweek(as.character(seq(1991,2017,2)))) %>%
filter(knot2 - knot1 > 104) %>%
as_tibble()
models <- models %>%
mutate(cv = purrr::pmap_dbl(models, get_cv)) %>%
arrange(cv)
(best <- head(models,1))## # A tibble: 1 x 4
## K knot1 knot2 cv
## <int> <week> <week> <dbl>
## 1 6 2007 W01 2013 W01 0.0641
fit <- us_gasoline %>%
model(
TSLM(Barrels ~ fourier(K=best$K) +
trend(c(best$knot1,best$knot2)))
)fit <- us_gasoline %>%
model(
ARIMA(Barrels ~ fourier(K = best$K) + trend(c(best$knot1, best$knot2))
+ PDQ(0,0,0))
)
fit %>% report()## Series: Barrels
## Model: LM w/ ARIMA(1,0,1) errors
##
## Coefficients:
## ar1 ma1 fourier(K = best$K)C1_52 fourier(K = best$K)S1_52
## 0.9277 -0.8414 -0.1144 -0.2306
## s.e. 0.0256 0.0357 0.0133 0.0132
## fourier(K = best$K)C2_52 fourier(K = best$K)S2_52
## 0.0418 0.0309
## s.e. 0.0105 0.0105
## fourier(K = best$K)C3_52 fourier(K = best$K)S3_52
## 0.0836 0.0343
## s.e. 0.0097 0.0097
## fourier(K = best$K)C4_52 fourier(K = best$K)S4_52
## 0.0187 0.0399
## s.e. 0.0094 0.0094
## fourier(K = best$K)C5_52 fourier(K = best$K)S5_52
## -0.0315 0.0011
## s.e. 0.0092 0.0092
## fourier(K = best$K)C6_52 fourier(K = best$K)S6_52
## -0.0523 0.0001
## s.e. 0.0092 0.0092
## trend(c(best$knot1, best$knot2))trend
## 0.0028
## s.e. 0.0001
## trend(c(best$knot1, best$knot2))trend_831
## -0.0051
## s.e. 0.0002
## trend(c(best$knot1, best$knot2))trend_1144 intercept
## 0.0055 7.1065
## s.e. 0.0006 0.0352
##
## sigma^2 estimated as 0.06051: log likelihood=-13.38
## AIC=64.76 AICc=65.33 BIC=163.78
gg_tsresiduals(fit)augment(fit) %>% features(.resid, ljung_box, dof = 18, lag = 24)## # A tibble: 1 x 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 "ARIMA(Barrels ~ fourier(K = best$K) + trend(c(best$knot1, ~ 23.6 0.000623
The model looks pretty good, although there is some heteroskedasticity and the model fails the Ljung-Box test. Nevertheless, the residual autocorrelations are tiny so should have almost no effect.
fit %>%
forecast(h = "1year") %>%
autoplot(us_gasoline)Electricity consumption is often modelled as a function of temperature. Temperature is measured by daily heating degrees and cooling degrees. Heating degrees is 18\(^∘\) C minus the average daily temperature when the daily average is below 18\(^∘\) C; otherwise it is zero. This provides a measure of our need to heat ourselves as temperature falls. Cooling degrees measures our need to cool ourselves as the temperature rises. It is defined as the average daily temperature minus 18\(^∘\) C when the daily average is above 18\(^∘\) C; otherwise it is zero. Let \(y_t\) denote the monthly total of kilowatt-hours of electricity used, let \(x_1\), t denote the monthly total of heating degrees, and let \(x_2\), t denote the monthly total of cooling degrees.
An analyst fits the following model to a set of such data: \[y^*_t=\beta_1x^*_{1,t}+\beta_2x^*_{2,t}+\eta_t,\] where \[(1-B)(1-B^{12})\eta_t=\frac{1+\theta_1 B}{1-\phi_{12}B^{12}-\phi_{24}B^{24}}\]
and \(y_t=log(y_t)\), \(x^*_{1,t}=\sqrt{x_{t,1}}\) and \(x^*_{2,t}=\sqrt{x_{t,2}}\)
What sort of ARIMA model is identified for \(\eta_t\)?
ARIMA(0,1,1)(2,1,0)1212
The estimated coefficients are Explain what the estimates of \(\beta_1\) and \(\beta_2\) tell us about electricity consumption.
\(b_1\) is the unit increase in \(y^∗_t\) when \(x^*_{1,t}\) increases by 1. This is a little hard to interpret, but it is clear that monthly total electricity usage goes up when monthly heating degrees goes up. Similarly, for \(b_2\), monthly total electricity usage goes up when monthly cooling degrees goes up.
Write the equation in a form more suitable for forecasting. Describe how this model could be used to forecast electricity demand for the next 12 months.
\[(1−B)(1−B^12)y^∗_t=b^∗_1(1−B)(1−B^12)x^∗_{1,t}+b^∗_2(1−B)\] \[(1−B^{12})x^∗_{2,t}+(1−B)(1−B^{12})\eta_t(1−B)(1−B^12)y_t^∗=b_1∗(1−B)\] \[(1−B^{12})x_{1,t}^∗+b_2^∗(1−B)(1−B^{12})x_{2,t}^∗+(1−B)(1−B^{12})\eta_t\]
So
\((y_t^∗−y_{t−1}^∗−y_{t−12}^∗+y_{t−13}^∗)=b_1(x_{1,t}^∗−x_{1,t−1}^∗−x_{1,t−12}^∗+x_{1,t−13}^∗)+b_2(x_{2,t}^∗−x_{2,t−1}^∗−x_{2,t−12}^∗+x_{2,t−13}^∗)+\) \(1−θ_1B_1−\phi^{12}B^{12}−\phi^{24}B^{24}\epsilon_t.\)
Multiplying by the AR polynomial gives
\((y_t^∗-y_{t−1}^∗−y_{t−12}^∗+y_{t−13}^∗)−\phi^{12}(y_{t−12}^∗−y_{t−13}^∗−y_{t−24}^∗+y_{t−25}^∗)−\phi^{24}(y_{t−24}^∗−y_{t−25}^∗−y_{t−36}^∗+y_{t−37}^∗)=\) \(b_1(x_{1,t}^∗−x_{1,t−1}^∗−x_{1,t−12}^∗+x_{1,t−13}^∗)−\phi^{12}b_1(x_{1,t−12}^∗−x_{1,t−13}^∗−x_{1,t−24}^∗+x_{1,t−25}^∗)−\) \(\phi^{24}b_1(x_{1,t−24}^∗−x_{1,t−25}^∗−x_{1,t−36}^∗+x_{1,t−37}^∗)+b_2(x_{2,t}^∗−x_{2,t−1}^∗−x_{2,t−12}^∗+x_{2,t−13}^∗)−\) \(\phi^{12}b_2(x_{2,t−12}^∗−x_{2,t−13}^∗−x_{2,t−24}^∗+x_{2,t−25}^∗)−\phi^{24}b_2(x_{2,t−24}^∗−x_{2,t−25}^∗−x_{2,t−36}^∗+x_{2,t−37}^∗)+\epsilon_t−\theta_1\epsilon_t−1.\)
Finally, we move all but y∗tyt∗ to the right hand side:
\(y_t^∗=y_{t−1}^∗+y_{t−12}^∗−y_{t−13}^∗+\phi^{12}(y_{t−12}^∗−y_{t−13}^∗−y_{t−24}^∗+y_{t−25}^∗)+\phi^{24}(y_{t−24}^∗−y_{t−25}^∗−y_{t−36}^∗+y_{t−37}^∗)+\) \(b_1(x_{1,t}^∗−x_{1,t−1}^∗−x_{1,t−12}^∗+x_{1,t−13}^∗)−\phi^{12}=b_1(x_{1,t−12}^∗−x_{1,t−13}^∗−x_{1,t−24}^∗+x_{1,t−25}^∗)−\) \(\phi^{24}b_1(x_{1,t−24}^∗−x_{1,t−25}^∗−x_{1,t−36}^∗+x_{1,t−37}^∗)+b_2(x_{2,t}^∗−x_{2,t−1}^∗−x_{2,t−12}^∗+x_{2,t−13}^∗)−\) \(\phi^{12}b_2(x_{2,t−12}^∗−x_{2,t−13}^∗−x_{2,t−24}^∗+x_{2,t−25}^∗)−\phi^{24}b_2(x_{2,t−24}^∗−x_{2,t−25}^∗−x_{2,t−36}^∗+x_{2,t−37}^∗)+\epsilon_t−\theta_1\epsilon_{t−1}.\)
Explain why the \(\eta_t\) term should be modeled with an ARIMA model rather than modeling the data using a standard regression package. In your discussion, comment on the properties of the estimates, the validity of the standard regression results, and the importance of the \(\eta_t\) model in producing forecasts.
For \(t=T+1\), we use the above equation to find a point forecast of \(y_{T+1}^∗\), setting \(\epsilon_{T+1}=0\) and \(\epsilon^T\) to the last residual. The actual \(y_t^∗\) values are all known, as are all the \(x^∗_{1,t}\) and \(x^∗_{2,t}\) values up to time \(t=T\). For \(x^∗_{1,T+1}\) and \(x^∗_{2,T+1}\), we could use a forecast (for example, a seasonal naive forecast). For \(t=T+2,…,T+12\), we do something similar, but both \(\epsilon\) values are set to 0 and \(y^∗_T+k (k≥1)\) is replaced by the forecasts just calculated.
For the retail time series considered in earlier chapters:
library(lubridate)
library(fpp3)
set.seed(12345678)
myseries <- aus_retail %>%
filter(
`Series ID` == sample(aus_retail$`Series ID`,1),
Month < yearmonth("2018 Jan")
)
myseries %>% features(Turnover, guerrero)## # A tibble: 1 x 3
## State Industry lambda_guerrero
## <chr> <chr> <dbl>
## 1 Northern Territo~ Clothing, footwear and personal accessory r~ -0.0337
myseries %>% autoplot(log(Turnover))fit <- myseries %>%
model(
`K=1` = ARIMA(log(Turnover) ~ trend() + fourier(K = 1) +
pdq(0:2, 0, 0:2) + PDQ(0:1, 0, 0:1)),
`K=2` = ARIMA(log(Turnover) ~ trend() + fourier(K = 2) +
pdq(0:2, 0, 0:2) + PDQ(0:1, 0, 0:1)),
`K=3` = ARIMA(log(Turnover) ~ trend() + fourier(K = 3) +
pdq(0:2, 0, 0:2) + PDQ(0:1, 0, 0:1)),
`K=4` = ARIMA(log(Turnover) ~ trend() + fourier(K = 4) +
pdq(0:2, 0, 0:2) + PDQ(0:1, 0, 0:1)),
`K=5` = ARIMA(log(Turnover) ~ trend() + fourier(K = 5) +
pdq(0:2, 0, 0:2) + PDQ(0:1, 0, 0:1)),
`K=6` = ARIMA(log(Turnover) ~ trend() + fourier(K = 6) +
pdq(0:2, 0, 0:2) + PDQ(0:1, 0, 0:1))
)
glance(fit)## # A tibble: 6 x 10
## State Industry .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 Northe~ Clothing, ~ K=1 0.00664 383. -748. -748. -713. <cpl [1~ <cpl [2~
## 2 Northe~ Clothing, ~ K=2 0.00652 389. -756. -755. -713. <cpl [1~ <cpl [2~
## 3 Northe~ Clothing, ~ K=3 0.00626 400. -774. -773. -723. <cpl [1~ <cpl [2~
## 4 Northe~ Clothing, ~ K=4 0.00596 411. -792. -791. -734. <cpl [1~ <cpl [2~
## 5 Northe~ Clothing, ~ K=5 0.00480 453. -873. -872. -811. <cpl [1~ <cpl [0~
## 6 Northe~ Clothing, ~ K=6 0.00437 470. -906. -905. -841. <cpl [1~ <cpl [1~
fit <- transmute(fit, best = `K=6`)
report(fit)## Series: Turnover
## Model: LM w/ ARIMA(1,0,1)(1,0,0)[12] errors
## Transformation: log(Turnover)
##
## Coefficients:
## ar1 ma1 sar1 trend() fourier(K = 6)C1_12
## 0.9632 -0.3755 0.1761 0.0041 -0.0809
## s.e. 0.0165 0.0502 0.0542 0.0006 0.0080
## fourier(K = 6)S1_12 fourier(K = 6)C2_12 fourier(K = 6)S2_12
## -0.1258 0.0381 -0.0882
## s.e. 0.0080 0.0052 0.0052
## fourier(K = 6)C3_12 fourier(K = 6)S3_12 fourier(K = 6)C4_12
## -0.0206 -0.0815 -0.0294
## s.e. 0.0045 0.0045 0.0042
## fourier(K = 6)S4_12 fourier(K = 6)C5_12 fourier(K = 6)S5_12
## -0.0538 -0.0554 -0.0540
## s.e. 0.0042 0.0041 0.0041
## fourier(K = 6)C6_12 intercept
## -0.0230 1.3317
## s.e. 0.0029 0.1231
##
## sigma^2 estimated as 0.004368: log likelihood=470.23
## AIC=-906.46 AICc=-904.65 BIC=-840.54
The chosen model is a linear trend (will be exponential after backtransforming) and fourier terms with 5 harmonics. The error model is ARIMA(1,0,1)(1,0,1).
gg_tsresiduals(fit) The residuals look well behaved.
fit <- myseries %>%
model(
dynamic = ARIMA(log(Turnover) ~ trend() + fourier(K = 6) +
pdq(0:2, 0, 0:2) + PDQ(0:1, 0, 0:1)),
arima = ARIMA(log(Turnover)),
ets = ETS(Turnover)
)## Warning in sqrt(diag(best$var.coef)): NaNs produced
fit %>%
forecast() %>%
autoplot(filter(myseries, year(Month) > 2010), level = 80, alpha = 0.5)