library(fpp3)
## -- Attaching packages -------------------------------------------- fpp3 0.4.0 --
## v tibble 3.1.4 v tsibble 1.0.1
## v dplyr 1.0.7 v tsibbledata 0.3.0
## v tidyr 1.1.3 v feasts 0.2.2
## v lubridate 1.7.10 v fable 0.3.1
## v ggplot2 3.3.5
## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x tsibble::intersect() masks base::intersect()
## x tsibble::interval() masks lubridate::interval()
## x dplyr::lag() masks stats::lag()
## x tsibble::setdiff() masks base::setdiff()
## x tsibble::union() masks base::union()
library(fpp2)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## -- Attaching packages ---------------------------------------------- fpp2 2.4 --
## v forecast 8.15 v expsmooth 2.3
## v fma 2.4
##
##
## Attaching package: 'fpp2'
## The following object is masked from 'package:fpp3':
##
## insurance
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v readr 2.0.1 v stringr 1.4.0
## v purrr 0.3.4 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x lubridate::as.difftime() masks base::as.difftime()
## x lubridate::date() masks base::date()
## x dplyr::filter() masks stats::filter()
## x tsibble::intersect() masks lubridate::intersect(), base::intersect()
## x tsibble::interval() masks lubridate::interval()
## x dplyr::lag() masks stats::lag()
## x tsibble::setdiff() masks lubridate::setdiff(), base::setdiff()
## x tsibble::union() masks lubridate::union(), base::union()
library(dplyr)
library(latex2exp)
library(ggplot2)
library(fabletools)
aus_livestock %>%
distinct(Animal)
## # A tibble: 7 x 1
## Animal
## <fct>
## 1 Bulls, bullocks and steers
## 2 Calves
## 3 Cattle (excl. calves)
## 4 Cows and heifers
## 5 Lambs
## 6 Pigs
## 7 Sheep
aus_livestock %>%
distinct(State)
## # A tibble: 8 x 1
## State
## <fct>
## 1 Australian Capital Territory
## 2 New South Wales
## 3 Northern Territory
## 4 Queensland
## 5 South Australia
## 6 Tasmania
## 7 Victoria
## 8 Western Australia
head(aus_livestock)
## # A tsibble: 6 x 4 [1M]
## # Key: Animal, State [1]
## Month Animal State Count
## <mth> <fct> <fct> <dbl>
## 1 1976 Jul Bulls, bullocks and steers Australian Capital Territory 2300
## 2 1976 Aug Bulls, bullocks and steers Australian Capital Territory 2100
## 3 1976 Sep Bulls, bullocks and steers Australian Capital Territory 2100
## 4 1976 Oct Bulls, bullocks and steers Australian Capital Territory 1900
## 5 1976 Nov Bulls, bullocks and steers Australian Capital Territory 2100
## 6 1976 Dec Bulls, bullocks and steers Australian Capital Territory 1800
aus_livestock %>%
filter(State=="Victoria" & Animal=="Pigs") %>%
autoplot(Count)+ ggtitle("Pigs in Victoria")
Pigs<-aus_livestock %>%
filter(State=="Victoria" & Animal=="Pigs")
FORECAST :
fit <- as_tsibble(Pigs) %>%
model(ETS(Count ~ error("A") + trend("N") + season("N")))
#fit1<-ses(Pigs,h=4)
fc<-fit %>%
forecast(h = 4)
print(tidy(fit))
## # A tibble: 2 x 5
## Animal State .model term estimate
## <fct> <fct> <chr> <chr> <dbl>
## 1 Pigs Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + se~ alpha 3.22e-1
## 2 Pigs Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + se~ l[0] 1.01e+5
DISCUSSION:
Parameters
alpha=.322 l0=1.01e+5
fc %>%
autoplot(Pigs) +
geom_line(aes(y = .fitted), col="#D55E00",
data = augment(fit)) +
labs(y="Count", title="Victoria Pigs") +
guides(colour = "none")
DISCUSSION:
Black line represents data, red line is one step ahead.
h = unpack_hilo(hilo(fc, 95) , "95%" )
View(h)
h[1,]
## # A tsibble: 1 x 8 [1M]
## # Key: Animal, State, .model [1]
## Animal State .model Month Count .mean `95%_lower` `95%_upper`
## <fct> <fct> <chr> <mth> <dist> <dbl> <dbl> <dbl>
## 1 Pigs Victoria "ETS(~ 2019 Jan N(95187, 8.7e+07) 95187. 76855. 113518.
h$Count
## <distribution[4]>
## [1] N(95187, 8.7e+07) N(95187, 9.7e+07) N(95187, 1.1e+08) N(95187, 1.1e+08)
So the r generated interval is: [76855,113518]
Now, manually calculate.
# mean of 1st forecast is 95186.56
(m<-95186.56)
## [1] 95186.56
#get sigma
report(fit)
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
#sigma^2: 87480760
(sigma<-84780760^.5)
## [1] 9207.647
#Lower bound
m-(1.96*sigma)
## [1] 77139.57
#upper bound
m+(1.96*sigma)
## [1] 113233.5
DISCUSSION: r generated 95% CI: [76855,113518] manual 95% CI: [77140,113234]
The two intervals are reasonably close. The manual CI bounds are closer together.
#Choose China
global_economy%>%distinct(Country)
## # A tibble: 263 x 1
## Country
## <fct>
## 1 Afghanistan
## 2 Albania
## 3 Algeria
## 4 American Samoa
## 5 Andorra
## 6 Angola
## 7 Antigua and Barbuda
## 8 Arab World
## 9 Argentina
## 10 Armenia
## # ... with 253 more rows
China_economy<-global_economy%>%
select("Year", Exports) %>%
filter(Country=="China")
autoplot(China_economy)+ggtitle("Timeplot: China Export")
## Plot variable not specified, automatically selected `.vars = Exports`
DISCUSSION:
The Chinese exports appear to have increasing trend until 2006 with a possible correction and decrease
#Summary of US exports
summary(China_economy$Exports)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.492 4.357 13.048 14.117 20.748 36.035
#STL decomposition
dcmp<-China_economy %>% model(stl=STL(Exports))
components(dcmp)
## # A dable: 58 x 7 [1Y]
## # Key: Country, .model [1]
## # : Exports = trend + remainder
## Country .model Year Exports trend remainder season_adjust
## <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 China stl 1960 4.31 4.21 0.100 4.31
## 2 China stl 1961 3.87 4.09 -0.223 3.87
## 3 China stl 1962 4.05 3.98 0.0704 4.05
## 4 China stl 1963 4.01 3.87 0.139 4.01
## 5 China stl 1964 3.77 3.75 0.0229 3.77
## 6 China stl 1965 3.64 3.62 0.0154 3.64
## 7 China stl 1966 3.49 3.48 0.0114 3.49
## 8 China stl 1967 3.28 3.33 -0.0488 3.28
## 9 China stl 1968 3.30 3.20 0.107 3.30
## 10 China stl 1969 3.05 3.13 -0.0821 3.05
## # ... with 48 more rows
components(dcmp) %>% as_tsibble() %>%
autoplot(Exports,colour="#D55E00")+
geom_line(aes(y=trend), colour="gray")
labs(y="Exports",title="Total China Exports")
## $y
## [1] "Exports"
##
## $title
## [1] "Total China Exports"
##
## attr(,"class")
## [1] "labels"
#red line is data, gray is trend
components(dcmp) %>% autoplot()
DISCUSSION:
The trend increases then decreases, no seasonality, and remainder terms are ok.
fit_c1 <- as_tsibble(China_economy) %>%
model(ETS(Exports ~ error("A") + trend("N") + season("N")))
fc_c1<-fit_c1 %>%
forecast(h = 4)
print(tidy(fit_c1))
## # A tibble: 2 x 4
## Country .model term estimate
## <fct> <chr> <chr> <dbl>
## 1 China "ETS(Exports ~ error(\"A\") + trend(\"N\") + season(\"~ alpha 1.00
## 2 China "ETS(Exports ~ error(\"A\") + trend(\"N\") + season(\"~ l[0] 4.31
DISCUSSION:
Parameters: alpha=.9998999 l[0]=4.3082414
fc_c1 %>%
autoplot(China_economy) +
geom_line(aes(y = .fitted), col="#D55E00",
data = augment(fit_c1)) +
labs(y="Exports", title="China Exports") +
guides(colour = "none")
accuracy(fit_c1)
## # A tibble: 1 x 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 China "ETS(Exports ~ ~ Trai~ 0.266 1.90 1.26 1.84 9.34 0.983 0.991 0.288
DISCUSSION:
RSME=1.900172
fit_c2 <- as_tsibble(China_economy) %>%
model(ETS(Exports ~ error("A") + trend("A") + season("N")))
fc_c2<-fit_c2 %>%
forecast(h = 4)
print(tidy(fit_c2))
## # A tibble: 4 x 4
## Country .model term estimate
## <fct> <chr> <chr> <dbl>
## 1 China "ETS(Exports ~ error(\"A\") + trend(\"A\") + season(\"~ alpha 1.00
## 2 China "ETS(Exports ~ error(\"A\") + trend(\"A\") + season(\"~ beta 0.0992
## 3 China "ETS(Exports ~ error(\"A\") + trend(\"A\") + season(\"~ l[0] 4.04
## 4 China "ETS(Exports ~ error(\"A\") + trend(\"A\") + season(\"~ b[0] 0.100
DISCUSSION:
Parameters of ETS(A, A, N) alpha=0.99989993 beta=0.09923760 l0=4.03989974
b0=0.09995689
The second model: ETS(A,A,N) produces a forecast with a downward trend, while the ETS(A,N,N) produces a level unchanging forecast.
ETS(A,A,N) accounts for trend but may overforecast. ETS(A,N,N) may not be sophisticated enough to capture the characteristics.
fc_c2 %>%
autoplot(China_economy) +
geom_line(aes(y = .fitted), col="#D55E00",
data = augment(fit_c1)) +
labs(y="Exports", title="China Exports") +
guides(colour = "none")
DISCUSSION:
The second model: ETS(A,A,N) produces a forecast with a downward trend, while the ETS(A,N,N) produces a level unchanging forecast.
Let’s look at RMSE:
accuracy(fit_c2)
## # A tibble: 1 x 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 China "ETS(Exports~ Trai~ -0.0854 1.91 1.25 -0.169 9.57 0.973 0.995 0.232
DISCUSSION:
RMSE=1.906436
the first model RMSE=1.900172
This indicates the first model is better. MAPE also indicates the first model is better.
h_c1 = unpack_hilo(hilo(fc_c1, 95) , "95%" )
View(h_c1)
h_c1[1,]
## # A tsibble: 1 x 7 [1Y]
## # Key: Country, .model [1]
## Country .model Year Exports .mean `95%_lower` `95%_upper`
## <fct> <chr> <dbl> <dist> <dbl> <dbl> <dbl>
## 1 China "ETS(Exports ~ error(\~ 2018 N(20, 3.7) 19.8 16.0 23.5
h_c1$Exports
## <distribution[4]>
## [1] N(20, 3.7) N(20, 7.5) N(20, 11) N(20, 15)
So the r generated interval is: [15.96718,23.54756]
Now, manually calculate.
# mean of 1st forecast is 19.75737
(m_c1<-19.75737)
## [1] 19.75737
#get sigma
report(fit_c1)
## Series: Exports
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.9998999
##
## Initial states:
## l[0]
## 4.308241
##
## sigma^2: 3.7396
##
## AIC AICc BIC
## 315.9713 316.4157 322.1526
#sigma^2: 3.7396
(sigma_c1<-3.7396^.5)
## [1] 1.933805
#Lower bound
m_c1-(1.96*sigma_c1)
## [1] 15.96711
#upper bound
m_c1+(1.96*sigma_c1)
## [1] 23.54763
DISCUSSION: ETS[A,N,N] r generated 95% CI: [15.96718,23.54756] manual 95% CI: [15.96711,23.54763]
The two intervals are very close.
Now calculate 95% for the second model ETS[A,A,N]
h_c2 = unpack_hilo(hilo(fc_c2, 95) , "95%" )
View(h_c2)
h_c2[1,]
## # A tsibble: 1 x 7 [1Y]
## # Key: Country, .model [1]
## Country .model Year Exports .mean `95%_lower` `95%_upper`
## <fct> <chr> <dbl> <dist> <dbl> <dbl> <dbl>
## 1 China "ETS(Exports ~ error(\~ 2018 N(19, 3.9) 19.4 15.5 23.2
h_c2$Exports
## <distribution[4]>
## [1] N(19, 3.9) N(19, 8.6) N(19, 14) N(18, 21)
So the r generated interval is: [15.493100,23.23803]
Now, manually calculate.
# mean of 1st forecast is 19.36556
(m_c2<-19.36556)
## [1] 19.36556
#get sigma
report(fit_c2)
## Series: Exports
## Model: ETS(A,A,N)
## Smoothing parameters:
## alpha = 0.9998999
## beta = 0.0992376
##
## Initial states:
## l[0] b[0]
## 4.0399 0.09995689
##
## sigma^2: 3.9037
##
## AIC AICc BIC
## 320.3530 321.5069 330.6552
#sigma^2: 3.9037
(sigma_c2<-3.9037^.5)
## [1] 1.975778
#Lower bound
m_c2-(1.96*sigma_c2)
## [1] 15.49303
#upper bound
m_c2+(1.96*sigma_c2)
## [1] 23.23809
DISCUSSION: ETS[A,A,N] r generated 95% CI: [15.493100,23.23803] manual 95% CI: [15.49303,23.23809]
The two intervals are very close.
[Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]
global_economy%>%distinct(Country)
## # A tibble: 263 x 1
## Country
## <fct>
## 1 Afghanistan
## 2 Albania
## 3 Algeria
## 4 American Samoa
## 5 Andorra
## 6 Angola
## 7 Antigua and Barbuda
## 8 Arab World
## 9 Argentina
## 10 Armenia
## # ... with 253 more rows
China_economy<-global_economy%>%
select("Year", GDP) %>%
filter(Country=="China")
autoplot(China_economy)+ggtitle("Timeplot: China GDP")
## Plot variable not specified, automatically selected `.vars = GDP`
DISCUSSION:
plot indicates level to exponential growth.
Let’s take a closer peek
#Summary of US exports
summary(China_economy$Exports)
## Warning: Unknown or uninitialised column: `Exports`.
## Length Class Mode
## 0 NULL NULL
#STL decomposition
dcmp2<-China_economy %>% model(stl=STL(GDP))
components(dcmp2)
## # A dable: 58 x 7 [1Y]
## # Key: Country, .model [1]
## # : GDP = trend + remainder
## Country .model Year GDP trend remainder season_adjust
## <fct> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 China stl 1960 59716467625. 49854129407. 9862338218. 59716467625.
## 2 China stl 1961 50056868958. 52325315607. -2268446650. 50056868958.
## 3 China stl 1962 47209359006. 54796501807. -7587142802. 47209359006.
## 4 China stl 1963 50706799903. 57892689218. -7185889315. 50706799903.
## 5 China stl 1964 59708343489. 61493201134. -1784857646. 59708343489.
## 6 China stl 1965 70436266147. 65616214610. 4820051537. 70436266147.
## 7 China stl 1966 76720285970. 70105946202. 6614339768. 76720285970.
## 8 China stl 1967 72881631327. 74486778685. -1605147358. 72881631327.
## 9 China stl 1968 70846535056. 79495425860. -8648890805. 70846535056.
## 10 China stl 1969 79705906247. 86406865221. -6700958973. 79705906247.
## # ... with 48 more rows
components(dcmp2) %>% as_tsibble() %>%
autoplot(GDP,colour="#D55E00")+
geom_line(aes(y=trend), colour="gray")
labs(y="GDP",title="Total China GDP")
## $y
## [1] "GDP"
##
## $title
## [1] "Total China GDP"
##
## attr(,"class")
## [1] "labels"
#red line is data, gray is trend
components(dcmp2) %>% autoplot()
Trend is upward.
Modelling as follows:
China_economy %>%
model(
`Holt's method` = ETS(GDP ~ error("A") +
trend("A") + season("N")),
`Damped Holt's method` = ETS(GDP ~ error("A") +
trend("Ad", phi = 0.9) + season("N"))
) %>%
forecast(h = 15) %>%
autoplot(China_economy, level = NULL) +
labs(title = "China GDP",
y = "GDP") +
guides(colour = guide_legend(title = "Forecast"))
DISCUSSION:
The forescasted by Holt’s method may over-forecast as seen in the plot because it is a constant increasing trend in this example.
The Damped Holt’s method brings in a parameter that dampens or flattens the trend. Let’s expand the forecast period.
China_economy %>%
model(
`Holt's method` = ETS(GDP ~ error("A") +
trend("A") + season("N")),
`Damped Holt's method` = ETS(GDP ~ error("A") +
trend("Ad", phi = 0.9) + season("N"))
) %>%
forecast(h = 30) %>%
autoplot(China_economy, level = NULL) +
labs(title = "China GDP",
y = "GDP") +
guides(colour = guide_legend(title = "Forecast"))
DISCUSSION:
With a forecast period of h=30, clearly Holt’s method and Damped Holt’s method diverge. Holt’s method continues to increase, while Damped Holt’s method flattens.
aus_production %>%
autoplot(Gas)+ ggtitle("Gas in Australia")
Plot appears to have increase trend, let’s decompose.
#STL decomposition
dcmp_g<-aus_production %>% model(stl=STL(Gas))
components(dcmp_g)
## # A dable: 218 x 7 [1Q]
## # Key: .model [1]
## # : Gas = trend + season_year + remainder
## .model Quarter Gas trend season_year remainder season_adjust
## <chr> <qtr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 stl 1956 Q1 5 5.85 -1.18 0.334 6.18
## 2 stl 1956 Q2 6 5.93 0.506 -0.439 5.49
## 3 stl 1956 Q3 7 6.02 1.10 -0.118 5.90
## 4 stl 1956 Q4 6 6.14 -0.428 0.286 6.43
## 5 stl 1957 Q1 5 6.27 -1.18 -0.0863 6.18
## 6 stl 1957 Q2 7 6.25 0.511 0.242 6.49
## 7 stl 1957 Q3 7 6.24 1.10 -0.345 5.90
## 8 stl 1957 Q4 6 6.24 -0.434 0.191 6.43
## 9 stl 1958 Q1 5 6.37 -1.18 -0.185 6.18
## 10 stl 1958 Q2 7 6.50 0.516 -0.0185 6.48
## # ... with 208 more rows
components(dcmp_g) %>% as_tsibble() %>%
autoplot(Gas,colour="#D55E00")+
geom_line(aes(y=trend), colour="gray")
labs(y="Gas",title="Total Gas")
## $y
## [1] "Gas"
##
## $title
## [1] "Total Gas"
##
## attr(,"class")
## [1] "labels"
#red line is data, gray is trend
components(dcmp_g) %>% autoplot()
DISCUSSION:
This plot shows there is increasing trend, seasonality, and increasing remainders.
fit_gas1<-aus_production %>%
model(
additive = ETS(Gas ~ error("A") +
trend("A") + season("A")),
multiplicative = ETS(Gas ~ error("A") +
trend("A") + season("M"))
)
fc_gas1<- fit_gas1 %>%
forecast(h = 15)
fc_gas1%>%
autoplot(aus_production, level = NULL) +
labs(title = "Australian Gas production",
y = "Gas") +
guides(colour = guide_legend(title = "Forecast"))
DISCUSSION:
The additive method is preferred when seasonal variations are roughly constant through the series, while the multiplicative method is preferred when the seasonal variations are changing proportional to the level of the series. Here they are increasing. Let’s look at RMSE:
accuracy(fit_gas1)
## # A tibble: 2 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 additive Training 0.00525 4.76 3.35 -4.69 10.9 0.600 0.628 0.0772
## 2 multiplicative Training 0.218 4.19 2.84 -0.920 5.03 0.510 0.553 0.0405
DISCUSSION:
The additive model RMSE=4.76. The multiplicative model RMSE=4.19.
Therefore, the multiplicative model is better.
Let’s try a damped trend next.
fit_gas2<-aus_production %>%
model(
`Damped Holt's method` = ETS(Gas ~ error("A") +
trend("Ad", phi = 0.9) + season("M")),
multiplicative = ETS(Gas ~ error("A") +
trend("A") + season("M"))
)
fc_gas2<- fit_gas2 %>%
forecast(h = 15)
fc_gas2%>%
autoplot(aus_production, level = NULL) +
labs(title = "Australian Gas production",
y = "Gas") +
guides(colour = guide_legend(title = "Forecast"))
Let’s take a look at the RMSE
accuracy(fit_gas2)
## # A tibble: 2 x 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Damped Holt's method Training 0.796 4.28 2.86 1.46 4.08 0.513 0.564 0.0134
## 2 multiplicative Training 0.218 4.19 2.84 -0.920 5.03 0.510 0.553 0.0405
DISCUSSION:
Damped RMSE=4.28 Multiplicative RMSE=4.19
This indicates the Multiplicate model is better than the damped model. The damped model does not improve the forescasts.
#Here is prior look at this series
set.seed(1111111)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries %>% distinct(Industry)
## # A tibble: 1 x 1
## Industry
## <chr>
## 1 Newspaper and book retailing
head(myseries)
## # A tsibble: 6 x 5 [1M]
## # Key: State, Industry [1]
## State Industry `Series ID` Month Turnover
## <chr> <chr> <chr> <mth> <dbl>
## 1 Western Australia Newspaper and book retailing A3349822A 1982 Apr 9.7
## 2 Western Australia Newspaper and book retailing A3349822A 1982 May 11
## 3 Western Australia Newspaper and book retailing A3349822A 1982 Jun 10.7
## 4 Western Australia Newspaper and book retailing A3349822A 1982 Jul 9
## 5 Western Australia Newspaper and book retailing A3349822A 1982 Aug 9.1
## 6 Western Australia Newspaper and book retailing A3349822A 1982 Sep 10
autoplot(myseries,.vars=Turnover)
A quick look at this timeseries shows an upward trend, with possible yearly seasonality and also possible a 10 year cyclicity. Let’s take a closer look at seasonality. Note the downward trend in 2010.
Lets decompose
#STL decomposition
dcmp_my<-myseries %>% model(stl=STL(Turnover))
components(dcmp_my)
## # A dable: 441 x 9 [1M]
## # Key: State, Industry, .model [1]
## # : Turnover = trend + season_year + remainder
## State Industry .model Month Turnover trend season_year remainder
## <chr> <chr> <chr> <mth> <dbl> <dbl> <dbl> <dbl>
## 1 Western Australia Newspap~ stl 1982 Apr 9.7 10.7 -0.991 0.00593
## 2 Western Australia Newspap~ stl 1982 May 11 10.4 0.260 0.318
## 3 Western Australia Newspap~ stl 1982 Jun 10.7 10.2 -0.815 1.36
## 4 Western Australia Newspap~ stl 1982 Jul 9 9.90 -0.631 -0.266
## 5 Western Australia Newspap~ stl 1982 Aug 9.1 9.66 0.0367 -0.597
## 6 Western Australia Newspap~ stl 1982 Sep 10 9.42 -0.712 1.29
## 7 Western Australia Newspap~ stl 1982 Oct 7.7 9.19 -0.496 -0.991
## 8 Western Australia Newspap~ stl 1982 Nov 8.4 8.97 -0.165 -0.401
## 9 Western Australia Newspap~ stl 1982 Dec 11.8 8.75 4.38 -1.32
## 10 Western Australia Newspap~ stl 1983 Jan 7.4 8.52 -0.0943 -1.03
## # ... with 431 more rows, and 1 more variable: season_adjust <dbl>
components(dcmp_my) %>% as_tsibble() %>%
autoplot(Turnover,colour="#D55E00")+
geom_line(aes(y=trend), colour="gray")
labs(y="Gas",title="Total Gas")
## $y
## [1] "Gas"
##
## $title
## [1] "Total Gas"
##
## attr(,"class")
## [1] "labels"
#red line is data, gray is trend
components(dcmp_my) %>% autoplot()
DISCUSSION:
As seen from the decomposition above, the seasonal variations are changing proportionally to the level of the series.
fit_my<-myseries %>%
model(
`Damped Holt's method` = ETS(Turnover ~ error("A") +
trend("Ad", phi = 0.9) + season("M")),
multiplicative = ETS(Turnover ~ error("A") +
trend("A") + season("M"))
)
fc_my<- fit_my %>%
forecast(h = 15)
fc_my%>%
autoplot(myseries, level = NULL) +
labs(title = "Newspaper Turnover ",
y = "Turnover") +
guides(colour = guide_legend(title = "Forecast"))
accuracy(fit_my)
## # A tibble: 2 x 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Wester~ Newspaper~ Damped~ Trai~ 0.0845 2.50 1.75 -0.113 6.91 0.421 0.441
## 2 Wester~ Newspaper~ multip~ Trai~ -0.00751 2.49 1.74 -0.462 6.89 0.418 0.440
## # ... with 1 more variable: ACF1 <dbl>
DISCUSSION:
Damped RSME=2.498 Multiplicative=2.49
so Multiplicative is better.
aug<-myseries %>%
model(
ETS(Turnover ~ error("A") +
trend("A") + season("M"))
)%>%
augment()
autoplot(aug,.innov)
train2011 <- myseries %>% filter(year(Month) < 2011 )
autoplot(train2011)
## Plot variable not specified, automatically selected `.vars = Turnover`
fit_e<-train2011 %>%
model(
`SNAIVE` = SNAIVE(Turnover),
multiplicative = ETS(Turnover ~ error("A") +
trend("A") + season("M"))
)
fc_e<- fit_e %>%
forecast(h = 15)
fc_e%>%
autoplot(train2011, level = NULL) +
labs(title = "Newspaper Turnover ",
y = "Turnover") +
guides(colour = guide_legend(title = "Forecast"))
RMSE
accuracy(fit_e)
## # A tibble: 2 x 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Wester~ Newspaper ~ SNAIVE Trai~ 0.838 5.67 4.09 2.49 16.0 1 1
## 2 Wester~ Newspaper ~ multip~ Trai~ -0.0426 2.24 1.59 -0.705 6.92 0.389 0.395
## # ... with 1 more variable: ACF1 <dbl>
DISCUSSION:
SNAIVE RMSE= 5.67 Multiplicative RMSE= 2.24
Multiplicative is better than SNAIVE.
(lambda = train2011 %>% features(Turnover, features = guerrero) %>%
pull(lambda_guerrero) )
## [1] 0.05073177
train2011$bc<-box_cox(train2011$Turnover, lambda)
#STL decomposition
dcmp_mybc<-train2011 %>% model(stl=STL(bc))
components(dcmp_mybc)
## # A dable: 345 x 9 [1M]
## # Key: State, Industry, .model [1]
## # : bc = trend + season_year + remainder
## State Industry .model Month bc trend season_year remainder
## <chr> <chr> <chr> <mth> <dbl> <dbl> <dbl> <dbl>
## 1 Western Australia Newspape~ stl 1982 Apr 2.41 2.51 -0.0904 -0.0122
## 2 Western Australia Newspape~ stl 1982 May 2.55 2.48 0.0307 0.0395
## 3 Western Australia Newspape~ stl 1982 Jun 2.52 2.45 -0.0756 0.146
## 4 Western Australia Newspape~ stl 1982 Jul 2.32 2.42 -0.0534 -0.0392
## 5 Western Australia Newspape~ stl 1982 Aug 2.34 2.39 0.0123 -0.0638
## 6 Western Australia Newspape~ stl 1982 Sep 2.44 2.36 -0.0570 0.140
## 7 Western Australia Newspape~ stl 1982 Oct 2.15 2.33 -0.0455 -0.135
## 8 Western Australia Newspape~ stl 1982 Nov 2.25 2.30 -0.00350 -0.0527
## 9 Western Australia Newspape~ stl 1982 Dec 2.63 2.28 0.347 0.00519
## 10 Western Australia Newspape~ stl 1983 Jan 2.11 2.25 -0.00727 -0.136
## # ... with 335 more rows, and 1 more variable: season_adjust <dbl>
components(dcmp_mybc) %>% as_tsibble() %>%
autoplot(bc,colour="#D55E00")+
geom_line(aes(y=trend), colour="gray")
labs(y="box coxTurnover",title="Total Turnover")
## $y
## [1] "box coxTurnover"
##
## $title
## [1] "Total Turnover"
##
## attr(,"class")
## [1] "labels"
#red line is data, gray is trend
components(dcmp_mybc) %>% autoplot()
We have completed the boxcox transformation and plots.
Now run models:
fit_bc<-train2011 %>%
model(ETS(bc ~ error("A") +
trend("A") + season("M")))
fc_bc<- fit_bc %>%
forecast(h = 15)
fc_bc%>%
autoplot(train2011, level = NULL) +
labs(title = "Newspaper Turnover ",
y = "Turnover transformed") +
guides(colour = guide_legend(title = "Forecast"))
accuracy(fit_bc
)
## # A tibble: 1 x 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Weste~ Newspape~ "ETS(bc~ Trai~ 0.00127 0.105 0.0788 -0.0171 2.45 0.406 0.435
## # ... with 1 more variable: ACF1 <dbl>
The RMSE for this box cox transformed Turnover = .147925 which is the best of all previous forecast on the test sets.