library(fpp3)
# install and load any package necessary
pig <- aus_livestock %>%
filter(Animal=="Pigs") %>%
filter(State=="Victoria")
view(pig)
autoplot(pig)
## Plot variable not specified, automatically selected `.vars = Count`
pigcurrent <- pig %>%
slice(451:558)
pig %>%
model(
classical_decomposition(Count, type = "additive")
) %>%
components() %>%
autoplot()
## Warning: Removed 6 row(s) containing missing values (geom_path).
fit <- pig %>%
model(ETS(Count ~ error("A") + trend("N") + season("N")))
report(fit)
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
fc <- fit %>%
forecast(h=4)
fc
## # A fable: 4 x 6 [1M]
## # Key: Animal, State, .model [1]
## Animal State .model Month Count .mean
## <fct> <fct> <chr> <mth> <dist> <dbl>
## 1 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Jan N(95187, 8.7e+07) 95187.
## 2 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Feb N(95187, 9.7e+07) 95187.
## 3 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Mar N(95187, 1.1e+08) 95187.
## 4 Pigs Victoria "ETS(Count ~ error(\"A\") +… 2019 Apr N(95187, 1.1e+08) 95187.
fc %>%
autoplot(pigcurrent) +
geom_line(aes(y = .fitted), col="#D55E00",
data = augment(fit))
#calculating prediction interval for h=4:
resid <- augment(fit)
sd <- sd(resid$.resid)
#sd= 9344.665
#y estimate= 95.186.56
y <- fc$.mean[4]
#lower
lower <- y-(1.96*sd)
#upper
upper <- y+1.96*sd
lower
## [1] 76871.01
upper
## [1] 113502.1
#r produced interval
hilo(fc) %>%
select("95%")
## # A tsibble: 4 x 2 [1M]
## `95%` Month
## <hilo> <mth>
## 1 [76854.79, 113518.3]95 2019 Jan
## 2 [75927.17, 114445.9]95 2019 Feb
## 3 [75042.22, 115330.9]95 2019 Mar
## 4 [74194.54, 116178.6]95 2019 Apr
#these are really close to the ones that I calculated by hand. This is probably a good thing.
china <- global_economy %>%
filter(Country== "China") %>%
select("Year", "Exports")
autoplot(china)
#data, which starts at 1960, shows a clear upward trend. this is due to China's aggressive policies pushing towards economic growth. The trend begins in 1970 and reaches its peak at around 2005. It looks like a decreasing trend begins at around the great recession, and it continues to decrease.
fit <- china %>%
model(
ANN = ETS(Exports~ error("A") + trend("N") + season("N"))
)
fc <- fit %>% forecast(h = 10)
fc
fc %>%
autoplot(china) +
geom_line(aes(y = .fitted), col="#D55E00",
data = augment(fit))
accuracy(fit)
fit2 <- china %>%
model(
AAN = ETS(Exports~ error("A") + trend("A") + season("N"))
)
fc2 <- fit2 %>% forecast(h=10)
fc2
accuracy(fit2)
#it looks like the AAN model performs just slightly better. This is suprising because the data showed a clear upward trend, maybe the fact that it had a big downward trend in 2008 had a big effect on the accuracy. Still, I wouldve thougth that the additive trend would have benefitted the accuracy.
#I think the first forecast is more accurate because I do not expect China's exports to decrease as time goes on, which is what the second forecast predicts. Clearly the trend is too volatile to be used. Looking at the current data, there was actually an increase in china's exports relative to 2017.
#what do we do with the rmse values?
chin <- global_economy %>%
filter(Country=="China") %>%
select(Country, GDP)
autoplot(chin)
## Plot variable not specified, automatically selected `.vars = GDP`
chinbc <- chin %>%
features(GDP, features = guerrero) %>%
pull(lambda_guerrero)
chin%>%
autoplot(box_cox(GDP, chinbc))
fit3 <- chin %>%
model(ETS(GDP))
fc3 <- fit3 %>%
forecast(h = 20)
report(fit3)
## Series: GDP
## Model: ETS(M,A,N)
## Smoothing parameters:
## alpha = 0.9998998
## beta = 0.3119984
##
## Initial states:
## l[0] b[0]
## 45713434615 3288256682
##
## sigma^2: 0.0108
##
## AIC AICc BIC
## 3102.064 3103.218 3112.366
fc3 %>%
autoplot(chin) +
geom_line(aes(y = .fitted), col="#D55E00",
data = augment(fit3))
fitbc <- chin %>%
model(ETS(box_cox(GDP,chinbc)))
fcbc <- fitbc %>%
forecast(h=20)
report(fitbc)
## Series: GDP
## Model: ETS(A,A,N)
## Transformation: box_cox(GDP, chinbc)
## Smoothing parameters:
## alpha = 0.9999
## beta = 0.07258856
##
## Initial states:
## l[0] b[0]
## 16.64919 0.02639696
##
## sigma^2: 0.0014
##
## AIC AICc BIC
## -138.5192 -137.3653 -128.2170
fcbc %>%
autoplot(chin) +
geom_line(aes(y = .fitted), col="#D55E00",
data = augment(fitbc))
fit3a <- chin %>%
model(ETS(GDP~error("A")+trend("Ad")+season("N")))
fc3a <- fit3a %>%
forecast(h=20)
report(fit3a)
## Series: GDP
## Model: ETS(A,Ad,N)
## Smoothing parameters:
## alpha = 0.9998747
## beta = 0.5634078
## phi = 0.9799999
##
## Initial states:
## l[0] b[0]
## 50284778075 3288256683
##
## sigma^2: 3.959258e+22
##
## AIC AICc BIC
## 3260.187 3261.834 3272.549
fc3a %>%
autoplot(chin) +
geom_line(aes(y = .fitted), col="#D55E00",
data = augment(fit3a))
#Holy shit multiplicative errors really do widen your prediction intervals. The box cox model has the lowest Aicc, however I'm not sure that I would use the multiplicative errors, because GDP isnt seasonal. I think the following model will be the best.
fitbc2 <- chin %>%
model(ETS(box_cox(GDP,chinbc)~error("A")+trend("Ad")+season("N")))
fcbc2 <- fitbc2 %>%
forecast(h=20)
report(fitbc2)
## Series: GDP
## Model: ETS(A,Ad,N)
## Transformation: box_cox(GDP, chinbc)
## Smoothing parameters:
## alpha = 0.9999
## beta = 0.3554223
## phi = 0.9200943
##
## Initial states:
## l[0] b[0]
## 16.69879 -0.02541932
##
## sigma^2: 0.0015
##
## AIC AICc BIC
## -134.8815 -133.2345 -122.5189
fcbc2 %>%
autoplot(chin)+
geom_line(aes(y=.fitted),col="green",
data=augment(fitbc2))
#even with the additive errors my prediction interval grows so much over time... I trust this one the most though for sure.
For more math examples, please check this website
gas <- aus_production %>%
select(Gas)
autoplot(gas)
## Plot variable not specified, automatically selected `.vars = Gas`
#clearly needs a transformation
gaslambda <- gas %>%
features(Gas, features=guerrero) %>%
pull(lambda_guerrero)
gas %>%
autoplot(box_cox(Gas, gaslambda))
#fitted values/ model
gasfit <- gas %>%
model(ETS(box_cox(Gas,gaslambda)))
gg_tsresiduals(gasfit)
#autocorrelation, not normally distributed.
report(gasfit)
## Series: Gas
## Model: ETS(A,A,A)
## Transformation: box_cox(Gas, gaslambda)
## Smoothing parameters:
## alpha = 0.8678852
## beta = 0.05714432
## gamma = 0.0334369
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 1.935509 0.01219028 -0.1040305 0.2453879 0.1145641 -0.2559215
##
## sigma^2: 0.0064
##
## AIC AICc BIC
## 81.64539 82.51077 112.10584
gasfc <- gasfit %>%
forecast(h=36)
gasfc %>%
autoplot(gas)+
geom_line(aes(y=.fitted), color = "green",
data=augment(gasfit))
#if i dont do the box cox, we need to do multiplicative because the level of the seasonality changes over time.
gasfit1 <- gas %>%
model(ETS(Gas))
gg_tsresiduals(gasfit1)
#a little heteroskedasticity, a lot of acf, but it looks normally distributed.
report(gasfit1)
## Series: Gas
## Model: ETS(M,A,M)
## Smoothing parameters:
## alpha = 0.6528545
## beta = 0.1441675
## gamma = 0.09784922
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 5.945592 0.07062881 0.9309236 1.177883 1.074851 0.8163427
##
## sigma^2: 0.0032
##
## AIC AICc BIC
## 1680.929 1681.794 1711.389
gasfc1 <- gasfit1 %>%
forecast(h=36)
gasfc1 %>%
autoplot(gas)+
geom_line(aes(y=.fitted), color = "green",
data=augment(gasfit1))
gasfit2 <- gas %>%
model(ETS(Gas~error("A")+trend("Ad")+season("M")))
gg_tsresiduals(gasfit2)
report(gasfit2)
## Series: Gas
## Model: ETS(A,Ad,M)
## Smoothing parameters:
## alpha = 0.6100606
## beta = 0.02485618
## gamma = 0.2225287
## phi = 0.98
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 5.643513 -0.06179413 0.9254594 1.060244 1.124238 0.8900577
##
## sigma^2: 18.5452
##
## AIC AICc BIC
## 1821.235 1822.297 1855.080
#there is severe heteroskedasticity.not normally distributed. not much autocorrelation tho.
gasfc2 <- gasfit2 %>%
forecast(h=36)
gasfc2 %>%
autoplot(gas)+
geom_line(aes(y=.fitted), color="green",
data=augment(gasfit2))
###5
set.seed(123333)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
myseries %>% autoplot(Turnover)
#we need to use multiplicative because the level of the seasonality changes with the data.
fitseries <- myseries %>%
model(ETS(Turnover ~ error("M") + trend("A") + season("M")))
fcseries <- fitseries %>%
forecast(h=36)
fcseries %>%
autoplot(myseries)+
geom_line(aes(y=.fitted), color="green",
data=augment(fitseries))
report(fitseries)
## Series: Turnover
## Model: ETS(M,A,M)
## Smoothing parameters:
## alpha = 0.4385116
## beta = 0.0001345133
## gamma = 0.0001015592
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5]
## 35.70144 0.4268796 0.9768357 0.8963407 0.9465278 1.352665 1.046269 0.9954085
## s[-6] s[-7] s[-8] s[-9] s[-10] s[-11]
## 0.9707365 0.9830932 0.9726422 0.939769 0.9785438 0.9411683
##
## sigma^2: 0.0013
##
## AIC AICc BIC
## 3976.603 3978.050 4046.116
fitseries2 <- myseries %>%
model(ETS(Turnover ~ error("M") + trend("Ad") + season("M")))
fcseries2 <- fitseries2 %>%
forecast(h=36)
fcseries2 %>%
autoplot(myseries)+
geom_line(aes(y=.fitted), color="green",
data=augment(fitseries2))
report(fitseries2)
## Series: Turnover
## Model: ETS(M,Ad,M)
## Smoothing parameters:
## alpha = 0.458572
## beta = 0.01128426
## gamma = 0.0001001574
## phi = 0.9772211
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5]
## 36.02009 0.4814974 0.9821001 0.8995871 0.9443604 1.347288 1.040544 0.9938802
## s[-6] s[-7] s[-8] s[-9] s[-10] s[-11]
## 0.9702889 0.9820467 0.9729883 0.9434179 0.9817436 0.941755
##
## sigma^2: 0.0013
##
## AIC AICc BIC
## 3989.712 3991.333 4063.315
accuracy(fitseries)
## # A tibble: 1 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Sout… Other r… "ETS(… Trai… 0.152 5.45 3.80 -0.0486 2.82 0.458 0.508 0.0440
#RMSE= 5.451617
accuracy(fitseries2)
## # A tibble: 1 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 South … Other r… "ETS(… Trai… 0.482 5.48 3.82 0.325 2.84 0.461 0.511 0.0207
#rsme =5.478495
#i prefer the one with the lower RMSE(1) but Its not a big difference.
gg_tsresiduals(fitseries)
#looks good to be except we have low levels of ac.
trainms <- myseries %>%
slice(1:346)
fitms <- trainms %>%
model(ETS(Turnover ~ error("M") + trend("Ad") + season("M")))
fcms <- fitms %>%
forecast(h=36)
testms <- myseries %>%
slice(347:441)
accuracy(fcms,testms)
## # A tibble: 1 × 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(Tur… Sout… Other r… Test 1.84 8.56 6.41 0.913 2.92 NaN NaN 0.225
#8.556635
snfitms <- trainms %>%
model(SNAIVE(Turnover))
fcsnfitms <- snfitms %>%
forecast(h=36)
accuracy(fcsnfitms,testms)
## # A tibble: 1 × 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE… Sout… Other r… Test -0.314 7.93 6.27 -0.275 2.92 NaN NaN 0.213
#7.932966
#looks like the snaive is a better model.
sumtourism <- tourism %>%
summarise(Trips=sum(Trips))
autoplot(sumtourism)
## Plot variable not specified, automatically selected `.vars = Trips`
sumtourism
## # A tsibble: 80 x 2 [1Q]
## Quarter Trips
## <qtr> <dbl>
## 1 1998 Q1 23182.
## 2 1998 Q2 20323.
## 3 1998 Q3 19827.
## 4 1998 Q4 20830.
## 5 1999 Q1 22087.
## 6 1999 Q2 21458.
## 7 1999 Q3 19914.
## 8 1999 Q4 20028.
## 9 2000 Q1 22339.
## 10 2000 Q2 19941.
## # … with 70 more rows
sumtourism %>%
model(
STL(Trips ~ trend(window = 7) +
season(window = "periodic"),
robust = TRUE)) %>%
components() %>%
autoplot()
#clear seasonal and trend.
stletsdamped <- sumtourism %>% model(decomposition_model(
STL(Trips),
ETS(season_adjust ~ error("A") + trend("Ad") + season("N"))))
accuracy(stletsdamped)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "decomposition_model(… Trai… 103. 763. 576. 0.367 2.72 0.607 0.629 -0.0174
#763
fcstl <- stletsdamped %>%
forecast(h= 8)
fcstl %>%
autoplot(sumtourism)+
geom_line(aes(y=.fitted), color="green",
data=augment(stletsdamped))
## Warning: Removed 4 row(s) containing missing values (geom_path).
stletsdampeda <- sumtourism %>% model(decomposition_model(
STL(Trips),
ETS(season_adjust ~ error("A") + trend("A") + season("N"))))
accuracy(stletsdampeda)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "decomposition_model(… Trai… 99.7 763. 574. 0.359 2.71 0.604 0.628 -0.0182
#762
fcstla <- stletsdampeda %>%
forecast(h=8)
fcstla %>%
autoplot(sumtourism)+
geom_line(aes(y=.fitted), color="green",
data=augment(stletsdampeda))
## Warning: Removed 4 row(s) containing missing values (geom_path).
blah <- sumtourism %>%
model(ETS(Trips))
report(blah)
## Series: Trips
## Model: ETS(A,A,A)
## Smoothing parameters:
## alpha = 0.4495675
## beta = 0.04450178
## gamma = 0.0001000075
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 21689.64 -58.46946 -125.8548 -816.3416 -324.5553 1266.752
##
## sigma^2: 699901.4
##
## AIC AICc BIC
## 1436.829 1439.400 1458.267
#aaa.
accuracy(blah)
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(Trips) Training 105. 794. 604. 0.379 2.86 0.636 0.653 -0.00151
fcblah <- blah %>%
forecast(h=8)
fcblah %>%
autoplot(sumtourism)+
geom_line(aes(y=.fitted), color="green",
data=augment(blah))
#793.6695.
#looks like the decomposed model is better by like 30 points.
#out of all the graphs, I think the first one with the damped trend is the best because it fits the values much better.
###7
new <- aus_arrivals %>%
filter(Origin=="NZ")
autoplot(new)
## Plot variable not specified, automatically selected `.vars = Arrivals`
view(new)
#upward trend, seasonality is increasing with time.
train <- new %>%
slice(1:119)
autoplot(train)
## Plot variable not specified, automatically selected `.vars = Arrivals`
fittrain <- train %>%
model(ETS(Arrivals~error("M")+trend("A")+season("M")))
fctrain <- fittrain %>%
forecast(h=8)
fctrain%>%
autoplot(new)+
geom_line(aes(y=.fitted), color="green",
data=augment(fittrain))
#it is needed because the lvl of seasonality changes over time.
#i) an ETS model; ii) a ETS model applied to a log transformed series; iii) a seasonal naive method; iv) an STL decomposition applied to the log transformed data followed by an ETS model applied to the seasonally adjusted (transformed) data.
sestrain <- train %>%
model(ETS(Arrivals~error("A")+trend("N")+season("N")))
report(sestrain)
## Series: Arrivals
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.2580434
##
## Initial states:
## l[0]
## 64589.28
##
## sigma^2: 1026511786
##
## AIC AICc BIC
## 3041.881 3042.090 3050.219
fcsestrain <- sestrain %>%
forecast(h=8)
fcsestrain %>%
autoplot(new, level = NULL)+
geom_line(aes(y=.fitted), color="green",
data=augment(sestrain))
logtrain <- train %>%
model(ETS(log(Arrivals)~error("A")+trend("N")+season("N")))
report(logtrain)
## Series: Arrivals
## Model: ETS(A,N,N)
## Transformation: log(Arrivals)
## Smoothing parameters:
## alpha = 0.280708
##
## Initial states:
## l[0]
## 11.0499
##
## sigma^2: 0.0496
##
## AIC AICc BIC
## 215.2636 215.4723 223.6010
fclogtrain <- logtrain %>%
forecast(h=8)
fclogtrain %>%
autoplot(new, level = NULL)+
geom_line(aes(y=.fitted), color="green",
data=augment(logtrain))
sntrain <- train %>%
model(SNAIVE(Arrivals))
fcsntrain <- sntrain %>%
forecast(h=8)
fcsntrain %>%
autoplot(new, level = NULL)+
geom_line(aes(y=.fitted), color="green",
data=augment(sntrain))
## Warning: Removed 4 row(s) containing missing values (geom_path).
weird <- train %>%
model(decomposition_model(STL(log(Arrivals)),ETS(season_adjust)))
fcweird <- weird %>%
forecast(h=8)
fcweird %>%
autoplot(new, level = NULL)+
geom_line(aes(y=.fitted), color="green",
data=augment(weird))
## Warning: Removed 4 row(s) containing missing values (geom_path).
report(sestrain)#3042
## Series: Arrivals
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.2580434
##
## Initial states:
## l[0]
## 64589.28
##
## sigma^2: 1026511786
##
## AIC AICc BIC
## 3041.881 3042.090 3050.219
report(logtrain)#215
## Series: Arrivals
## Model: ETS(A,N,N)
## Transformation: log(Arrivals)
## Smoothing parameters:
## alpha = 0.280708
##
## Initial states:
## l[0]
## 11.0499
##
## sigma^2: 0.0496
##
## AIC AICc BIC
## 215.2636 215.4723 223.6010
report(sntrain)
## Series: Arrivals
## Model: SNAIVE
##
## sigma^2: 321651340.4978
report(weird)#-44
## Series: Arrivals
## Model: STL decomposition model
## Transformation: log(Arrivals)
## Combination: season_adjust + season_year
##
## ========================================
##
## Series: season_adjust
## Model: ETS(A,A,N)
## Smoothing parameters:
## alpha = 0.8101279
## beta = 0.0001000026
##
## Initial states:
## l[0] b[0]
## 11.11081 0.01153924
##
## sigma^2: 0.0055
##
## AIC AICc BIC
## -44.78504 -44.25407 -30.88942
##
## Series: season_year
## Model: SNAIVE
##
## sigma^2: 0
gg_tsresiduals(sestrain)
gg_tsresiduals(logtrain)
gg_tsresiduals(sntrain)
## Warning: Removed 4 row(s) containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_bin).
gg_tsresiduals(weird)
## Warning: Removed 4 row(s) containing missing values (geom_path).
## Warning: Removed 4 rows containing missing values (geom_point).
## Warning: Removed 4 rows containing non-finite values (stat_bin).
#the 4th one is definetly best because it has the lowest aicc and its residuals resemble wn.
cv <- new %>%
stretch_tsibble(.init = 36, .step = 3)
cv
## # A tsibble: 2,511 x 4 [1Q]
## # Key: .id, Origin [31]
## Quarter Origin Arrivals .id
## <qtr> <chr> <int> <int>
## 1 1981 Q1 NZ 49140 1
## 2 1981 Q2 NZ 87467 1
## 3 1981 Q3 NZ 85841 1
## 4 1981 Q4 NZ 61882 1
## 5 1982 Q1 NZ 42045 1
## 6 1982 Q2 NZ 63081 1
## 7 1982 Q3 NZ 73275 1
## 8 1982 Q4 NZ 54808 1
## 9 1983 Q1 NZ 41030 1
## 10 1983 Q2 NZ 56155 1
## # … with 2,501 more rows
cvfit <- cv %>% model(ETS(Arrivals))
cvfc <- cvfit %>%
forecast(h=3)
accuracy(cvfit)
## # A tibble: 31 × 12
## .id Origin .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <int> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 NZ ETS(Ar… Trai… 1879. 9578. 7477. 1.41 8.94 0.479 0.483 -0.0211
## 2 2 NZ ETS(Ar… Trai… 1147. 9371. 7352. 0.709 8.82 0.499 0.493 -0.0439
## 3 3 NZ ETS(Ar… Trai… 1391. 9832. 7833. 0.933 9.18 0.542 0.529 0.0369
## 4 4 NZ ETS(Ar… Trai… 1355. 9947. 8013. 0.767 9.17 0.541 0.530 -0.0619
## 5 5 NZ ETS(Ar… Trai… 1624. 10433. 8318. 0.918 9.24 0.563 0.561 -0.0691
## 6 6 NZ ETS(Ar… Trai… 1516. 10221. 8209. 0.845 8.98 0.545 0.548 -0.0682
## 7 7 NZ ETS(Ar… Trai… 1066. 10181. 8071. 0.458 8.77 0.556 0.560 -0.0565
## 8 8 NZ ETS(Ar… Trai… 1209. 10154. 7986. 0.771 8.69 0.574 0.574 -0.0797
## 9 9 NZ ETS(Ar… Trai… 1273. 9880. 7731. 0.821 8.36 0.548 0.559 -0.0794
## 10 10 NZ ETS(Ar… Trai… 1701. 10451. 7995. 1.19 8.47 0.522 0.545 -0.0466
## # … with 21 more rows
cvfitlog <- cv %>% model(ETS(log(Arrivals) ~ error("A") +trend("A") + season("A")))
cvfclog <- cvfitlog %>%
forecast(h=3)
accuracy(cvfitlog)
## # A tibble: 31 × 12
## .id Origin .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <int> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 NZ "ETS(log(Ar… Trai… -883. 10340. 7635. -0.457 8.85 0.489 0.522
## 2 2 NZ "ETS(log(Ar… Trai… -917. 9897. 7228. -0.535 8.39 0.491 0.520
## 3 3 NZ "ETS(log(Ar… Trai… -96.4 9934. 8056. -0.591 9.41 0.558 0.535
## 4 4 NZ "ETS(log(Ar… Trai… 298. 9731. 7915. -0.379 9.11 0.535 0.519
## 5 5 NZ "ETS(log(Ar… Trai… 35.2 10672. 8275. -0.0333 9.20 0.560 0.574
## 6 6 NZ "ETS(log(Ar… Trai… 47.4 10175. 8172. -0.562 9.10 0.543 0.546
## 7 7 NZ "ETS(log(Ar… Trai… 58.8 10233. 8308. -0.605 9.15 0.573 0.563
## 8 8 NZ "ETS(log(Ar… Trai… -195. 10200. 8237. -0.762 8.99 0.592 0.577
## 9 9 NZ "ETS(log(Ar… Trai… 258. 9781. 7663. -0.245 8.31 0.543 0.553
## 10 10 NZ "ETS(log(Ar… Trai… 43.8 10335. 7996. -0.524 8.51 0.522 0.539
## # … with 21 more rows, and 1 more variable: ACF1 <dbl>
cvfitsn <- cv %>% model(SNAIVE(Arrivals))
cvfcsn <- cvfitsn %>%
forecast(h=3)
accuracy(cvfitsn)
## # A tibble: 31 × 12
## .id Origin .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <int> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 NZ SNAIVE(… Trai… 5156. 19812. 15599. 3.84 17.2 1 1 0.670
## 2 2 NZ SNAIVE(… Trai… 4255. 19020. 14721. 3.02 16.2 1 1 0.678
## 3 3 NZ SNAIVE(… Trai… 4031. 18575. 14450. 2.96 15.8 1 1 0.654
## 4 4 NZ SNAIVE(… Trai… 4429. 18751. 14802. 3.00 15.8 1 1 0.604
## 5 5 NZ SNAIVE(… Trai… 3709. 18584. 14775. 2.44 15.5 1 1 0.583
## 6 6 NZ SNAIVE(… Trai… 4698. 18650. 15057. 3.28 15.6 1 1 0.589
## 7 7 NZ SNAIVE(… Trai… 4062. 18170. 14508. 2.78 14.9 1 1 0.571
## 8 8 NZ SNAIVE(… Trai… 3797. 17677. 13918. 2.64 14.3 1 1 0.578
## 9 9 NZ SNAIVE(… Trai… 4537. 17675. 14115. 3.14 14.2 1 1 0.582
## 10 10 NZ SNAIVE(… Trai… 6213 19185. 15304. 4.15 14.6 1 1 0.616
## # … with 21 more rows
cvfitdc <- cv %>% model(decomposition_model(STL(log(Arrivals)),ETS(season_adjust)))
cvfcdc <- cvfitdc %>%
forecast(h=3)
accuracy(cvfitdc)
## # A tibble: 31 × 12
## .id Origin .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <int> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 NZ decompo… Trai… 2030. 9076. 6992. 1.75 8.31 0.448 0.458 -0.0848
## 2 2 NZ decompo… Trai… 1453. 8884. 6879. 1.10 8.15 0.467 0.467 -0.0758
## 3 3 NZ decompo… Trai… 1528. 8995. 7072. 1.23 8.23 0.489 0.484 -0.0932
## 4 4 NZ decompo… Trai… 1469. 9279. 7485. 1.05 8.46 0.506 0.495 -0.141
## 5 5 NZ decompo… Trai… 1800. 9432. 7425. 1.34 8.18 0.503 0.508 -0.129
## 6 6 NZ decompo… Trai… 1786. 9125. 7070. 1.31 7.75 0.470 0.489 -0.132
## 7 7 NZ decompo… Trai… 1370. 8875. 6857. 1.00 7.48 0.473 0.488 -0.124
## 8 8 NZ decompo… Trai… 1568. 8715. 6801. 1.19 7.34 0.489 0.493 -0.142
## 9 9 NZ decompo… Trai… 1643. 8492. 6590. 1.25 7.05 0.467 0.480 -0.142
## 10 10 NZ decompo… Trai… 2129. 9115. 6875. 1.57 7.13 0.449 0.475 -0.143
## # … with 21 more rows
#it looks like the best one is the additive log.
###8
port <- aus_production %>%
select(Cement)
port
## # A tsibble: 218 x 2 [1Q]
## Cement Quarter
## <dbl> <qtr>
## 1 465 1956 Q1
## 2 532 1956 Q2
## 3 561 1956 Q3
## 4 570 1956 Q4
## 5 529 1957 Q1
## 6 604 1957 Q2
## 7 603 1957 Q3
## 8 582 1957 Q4
## 9 554 1958 Q1
## 10 620 1958 Q2
## # … with 208 more rows
cv1 <- port %>%
stretch_tsibble(.init = 5, .step = 1)
cvets <- cv1 %>%
model(ETS(Cement))
fccvets <- cvets %>%
forecast(h=4)
fccvets
## # A fable: 856 x 5 [1Q]
## # Key: .id, .model [214]
## .id .model Quarter Cement .mean
## <int> <chr> <qtr> <dist> <dbl>
## 1 1 ETS(Cement) 1957 Q2 N(531, 2261) 531.
## 2 1 ETS(Cement) 1957 Q3 N(531, 2261) 531.
## 3 1 ETS(Cement) 1957 Q4 N(531, 2261) 531.
## 4 1 ETS(Cement) 1958 Q1 N(531, 2261) 531.
## 5 2 ETS(Cement) 1957 Q3 N(543, 2794) 543.
## 6 2 ETS(Cement) 1957 Q4 N(543, 2794) 543.
## 7 2 ETS(Cement) 1958 Q1 N(543, 2794) 543.
## 8 2 ETS(Cement) 1958 Q2 N(543, 2794) 543.
## 9 3 ETS(Cement) 1957 Q4 N(552, 2842) 552.
## 10 3 ETS(Cement) 1958 Q1 N(552, 2842) 552.
## # … with 846 more rows
report(cvets)
## Warning in report.mdl_df(cvets): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.
## # A tibble: 214 × 10
## .id .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
## <int> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 ETS(Cement) 0.00801 -22.1 50.1 74.1 48.9 1356. 913. 0.0518
## 2 2 ETS(Cement) 0.00947 -28.0 61.9 73.9 61.3 1863. 1371. 0.0641
## 3 3 ETS(Cement) 0.00933 -33.5 72.9 80.9 72.8 2030. 1461. 0.0673
## 4 4 ETS(Cement) 2135. -37.8 81.7 87.7 81.9 1601. 2714. 35.2
## 5 5 ETS(Cement) 0.00694 -43.3 92.5 97.3 93.1 1667. 1136. 0.0569
## 6 6 ETS(Cement) 0.00742 -49.2 104. 108. 105. 1874. 1321. 0.0601
## 7 7 ETS(Cement) 2093. -54.1 114. 118. 115. 1712. 2680. 36.5
## 8 8 ETS(Cement) 1884. -59.1 124. 127. 126. 1570. 2631. 33.8
## 9 9 ETS(Cement) 2069. -65.2 136. 139. 138. 1751. 2373. 38.3
## 10 10 ETS(Cement) 0.00426 -67.0 144. 151. 147. 1032. 993. 0.0474
## # … with 204 more rows
cvsn <- cv1 %>%
model(SNAIVE(Cement))
fccvsn <- cvsn %>%
forecast(h=4)
fccvsn
## # A fable: 856 x 5 [1Q]
## # Key: .id, .model [214]
## .id .model Quarter Cement .mean
## <int> <chr> <qtr> <dist> <dbl>
## 1 1 SNAIVE(Cement) 1957 Q2 N(532, 4096) 532
## 2 1 SNAIVE(Cement) 1957 Q3 N(561, 4096) 561
## 3 1 SNAIVE(Cement) 1957 Q4 N(570, 4096) 570
## 4 1 SNAIVE(Cement) 1958 Q1 N(529, 4096) 529
## 5 2 SNAIVE(Cement) 1957 Q3 N(561, 4640) 561
## 6 2 SNAIVE(Cement) 1957 Q4 N(570, 4640) 570
## 7 2 SNAIVE(Cement) 1958 Q1 N(529, 4640) 529
## 8 2 SNAIVE(Cement) 1958 Q2 N(604, 4640) 604
## 9 3 SNAIVE(Cement) 1957 Q4 N(570, 3681) 570
## 10 3 SNAIVE(Cement) 1958 Q1 N(529, 3681) 529
## # … with 846 more rows
report(cvsn)
## Warning in report.mdl_df(cvsn): Model reporting is only supported for individual
## models, so a glance will be shown. To see the report for a specific model, use
## `select()` and `filter()` to identify a single model.
## # A tibble: 214 × 3
## .id .model sigma2
## <int> <chr> <dbl>
## 1 1 SNAIVE(Cement) NA
## 2 2 SNAIVE(Cement) 32
## 3 3 SNAIVE(Cement) 241.
## 4 4 SNAIVE(Cement) 721
## 5 5 SNAIVE(Cement) 642
## 6 6 SNAIVE(Cement) 635.
## 7 7 SNAIVE(Cement) 532.
## 8 8 SNAIVE(Cement) 488.
## 9 9 SNAIVE(Cement) 481
## 10 10 SNAIVE(Cement) 448.
## # … with 204 more rows
#b LOL!!!!!
port2 <- port %>%
slice(1:(n()-4)) %>%
stretch_tsibble(.init = 5, .step = 4)
portcomp2 <- port2 %>%
model(ETS(Cement),
SNAIVE(Cement)) %>%
forecast(h = "1 year")
cement_mse <- accuracy(portcomp2, port) %>%
mutate(MSE = (RMSE)^2) %>%
select(MSE, RMSE)
cement_mse
## # A tibble: 2 × 2
## MSE RMSE
## <dbl> <dbl>
## 1 9781. 98.9
## 2 17839. 134.
cement_mse %>%
add_column(Model = c('ETS', 'SNAIVE'))
## # A tibble: 2 × 3
## MSE RMSE Model
## <dbl> <dbl> <chr>
## 1 9781. 98.9 ETS
## 2 17839. 134. SNAIVE
cement_final <- cement_mse %>%
add_column(Model = c('ETS', 'SNAIVE')) %>%
select(Model, MSE, RMSE)