8.1, 8.5, 8.6, 8.7, 8.8, 8.9
#loaded relevant libraries
library(fpp3)
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.5
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.4.1
## ✔ lubridate 1.9.3 ✔ fable 0.4.0
## ✔ ggplot2 3.5.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ purrr 1.0.2 ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
library(latex2exp)
library(seasonal)
##
## Attaching package: 'seasonal'
##
## The following object is masked from 'package:tibble':
##
## view
##8.1) Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset a) Use the ETS() function to estimate the equivalent model for simple exponential smoothing. Find the optimal values of α and ℓ0, and generate forecasts for the next four months.
head(aus_livestock)
## # A tsibble: 6 x 4 [1M]
## # Key: Animal, State [1]
## Month Animal State Count
## <mth> <fct> <fct> <dbl>
## 1 1976 Jul Bulls, bullocks and steers Australian Capital Territory 2300
## 2 1976 Aug Bulls, bullocks and steers Australian Capital Territory 2100
## 3 1976 Sep Bulls, bullocks and steers Australian Capital Territory 2100
## 4 1976 Oct Bulls, bullocks and steers Australian Capital Territory 1900
## 5 1976 Nov Bulls, bullocks and steers Australian Capital Territory 2100
## 6 1976 Dec Bulls, bullocks and steers Australian Capital Territory 1800
unique(aus_livestock$Animal)
## [1] Bulls, bullocks and steers Calves
## [3] Cattle (excl. calves) Cows and heifers
## [5] Lambs Pigs
## [7] Sheep
## 7 Levels: Bulls, bullocks and steers Calves ... Sheep
#filter out the data needed
aus_pigs <- aus_livestock |>
filter(Animal == "Pigs", State == "Victoria")
#view the data
aus_livestock |>
filter(Animal == "Pigs", State == "Victoria") |> autoplot()
## Plot variable not specified, automatically selected `.vars = Count`
#build a model
fit <- aus_pigs |>
model(ETS(Count))
report(fit)
## Series: Count
## Model: ETS(A,N,A)
## Smoothing parameters:
## alpha = 0.3579401
## gamma = 0.0001000139
##
## Initial states:
## l[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5] s[-6]
## 95487.5 2066.235 6717.311 -2809.562 -1341.299 -7750.173 -8483.418 5610.89
## s[-7] s[-8] s[-9] s[-10] s[-11]
## -579.8107 1215.464 -2827.091 1739.465 6441.989
##
## sigma^2: 60742898
##
## AIC AICc BIC
## 13545.38 13546.26 13610.24
#model selected A,N,A
alpha = 0.3579401 gamma = 0.0001000139 l0 95487.5
#forcast with the model for 4 months
fit |>
forecast(h = 4) |>
autoplot(aus_pigs)+
labs(title="Pigs Victoria",
y="Count")
Compute a 95% prediction interval for the first forecast using y±1.96s where s is the standard deviation of the residuals. Compare your interval with the interval produced by R
#interval produced by R
fit |>
forecast(h = 4) |>
hilo()
## # A tsibble: 4 x 8 [1M]
## # Key: Animal, State, .model [1]
## Animal State .model Month
## <fct> <fct> <chr> <mth>
## 1 Pigs Victoria ETS(Count) 2019 Jan
## 2 Pigs Victoria ETS(Count) 2019 Feb
## 3 Pigs Victoria ETS(Count) 2019 Mar
## 4 Pigs Victoria ETS(Count) 2019 Apr
## # ℹ 4 more variables: Count <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>
#to pick out the 95% prediction interval
fit95 <- fit |>
forecast(h = 4) |>
hilo()
#gather the residuals to compute the standard deviation
residinfo <- augment(fit)
class(residinfo)
## [1] "tbl_ts" "tbl_df" "tbl" "data.frame"
#putting the residuals into residinfores
residinfores <- residinfo$.resid
#compute the standard deviation of the residuals
sd(residinfores)
## [1] 7702.275
#7702.275
print("s=7702.275")
## [1] "s=7702.275"
print("y+1.96s")
## [1] "y+1.96s"
print(84424.71 + 1.96 * 7702.275)
## [1] 99521.17
print("y-1.96s")
## [1] "y-1.96s"
print(84424.71 - 1.96 * 7702.275)
## [1] 69328.25
print(fit95$`95%`[1])
## <hilo[1]>
## [1] [69149.19, 99700.22]95
print("percent difference of y - 1.96s compared to r generated value")
## [1] "percent difference of y - 1.96s compared to r generated value"
((69328-69149.19)/69149.19 ) * 100
## [1] 0.2585858
print("percent difference of y + 1.96s compared to r generated value")
## [1] "percent difference of y + 1.96s compared to r generated value"
((99521.17-99700.22)/99700.22 ) * 100
## [1] -0.1795884
when compared to r intervals. my computed intervals differ by about .2% to .3%
##8.5
Data set global_economy contains the annual Exports from many countries. Select one country to analyse. a) Plot the Exports series and discuss the main features of the data.
unique(global_economy$Country)
## [1] Afghanistan
## [2] Albania
## [3] Algeria
## [4] American Samoa
## [5] Andorra
## [6] Angola
## [7] Antigua and Barbuda
## [8] Arab World
## [9] Argentina
## [10] Armenia
## [11] Aruba
## [12] Australia
## [13] Austria
## [14] Azerbaijan
## [15] Bahamas, The
## [16] Bahrain
## [17] Bangladesh
## [18] Barbados
## [19] Belarus
## [20] Belgium
## [21] Belize
## [22] Benin
## [23] Bermuda
## [24] Bhutan
## [25] Bolivia
## [26] Bosnia and Herzegovina
## [27] Botswana
## [28] Brazil
## [29] British Virgin Islands
## [30] Brunei Darussalam
## [31] Bulgaria
## [32] Burkina Faso
## [33] Burundi
## [34] Cabo Verde
## [35] Cambodia
## [36] Cameroon
## [37] Canada
## [38] Caribbean small states
## [39] Cayman Islands
## [40] Central African Republic
## [41] Central Europe and the Baltics
## [42] Chad
## [43] Channel Islands
## [44] Chile
## [45] China
## [46] Colombia
## [47] Comoros
## [48] Congo, Dem. Rep.
## [49] Congo, Rep.
## [50] Costa Rica
## [51] Cote d'Ivoire
## [52] Croatia
## [53] Cuba
## [54] Curacao
## [55] Cyprus
## [56] Czech Republic
## [57] Denmark
## [58] Djibouti
## [59] Dominica
## [60] Dominican Republic
## [61] Early-demographic dividend
## [62] East Asia & Pacific
## [63] East Asia & Pacific (excluding high income)
## [64] East Asia & Pacific (IDA & IBRD countries)
## [65] Ecuador
## [66] Egypt, Arab Rep.
## [67] El Salvador
## [68] Equatorial Guinea
## [69] Eritrea
## [70] Estonia
## [71] Eswatini
## [72] Ethiopia
## [73] Euro area
## [74] Europe & Central Asia
## [75] Europe & Central Asia (excluding high income)
## [76] Europe & Central Asia (IDA & IBRD countries)
## [77] European Union
## [78] Faroe Islands
## [79] Fiji
## [80] Finland
## [81] Fragile and conflict affected situations
## [82] France
## [83] French Polynesia
## [84] Gabon
## [85] Gambia, The
## [86] Georgia
## [87] Germany
## [88] Ghana
## [89] Gibraltar
## [90] Greece
## [91] Greenland
## [92] Grenada
## [93] Guam
## [94] Guatemala
## [95] Guinea
## [96] Guinea-Bissau
## [97] Guyana
## [98] Haiti
## [99] Heavily indebted poor countries (HIPC)
## [100] High income
## [101] Honduras
## [102] Hong Kong SAR, China
## [103] Hungary
## [104] IBRD only
## [105] Iceland
## [106] IDA & IBRD total
## [107] IDA blend
## [108] IDA only
## [109] IDA total
## [110] India
## [111] Indonesia
## [112] Iran, Islamic Rep.
## [113] Iraq
## [114] Ireland
## [115] Isle of Man
## [116] Israel
## [117] Italy
## [118] Jamaica
## [119] Japan
## [120] Jordan
## [121] Kazakhstan
## [122] Kenya
## [123] Kiribati
## [124] Korea, Dem. People's Rep.
## [125] Korea, Rep.
## [126] Kosovo
## [127] Kuwait
## [128] Kyrgyz Republic
## [129] Lao PDR
## [130] Late-demographic dividend
## [131] Latin America & Caribbean
## [132] Latin America & Caribbean (excluding high income)
## [133] Latin America & the Caribbean (IDA & IBRD countries)
## [134] Latvia
## [135] Least developed countries: UN classification
## [136] Lebanon
## [137] Lesotho
## [138] Liberia
## [139] Libya
## [140] Liechtenstein
## [141] Lithuania
## [142] Low & middle income
## [143] Low income
## [144] Lower middle income
## [145] Luxembourg
## [146] Macao SAR, China
## [147] Macedonia, FYR
## [148] Madagascar
## [149] Malawi
## [150] Malaysia
## [151] Maldives
## [152] Mali
## [153] Malta
## [154] Marshall Islands
## [155] Mauritania
## [156] Mauritius
## [157] Mexico
## [158] Micronesia, Fed. Sts.
## [159] Middle East & North Africa
## [160] Middle East & North Africa (excluding high income)
## [161] Middle East & North Africa (IDA & IBRD countries)
## [162] Middle income
## [163] Moldova
## [164] Monaco
## [165] Mongolia
## [166] Montenegro
## [167] Morocco
## [168] Mozambique
## [169] Myanmar
## [170] Namibia
## [171] Nauru
## [172] Nepal
## [173] Netherlands
## [174] New Caledonia
## [175] New Zealand
## [176] Nicaragua
## [177] Niger
## [178] Nigeria
## [179] North America
## [180] Northern Mariana Islands
## [181] Norway
## [182] OECD members
## [183] Oman
## [184] Other small states
## [185] Pacific island small states
## [186] Pakistan
## [187] Palau
## [188] Panama
## [189] Papua New Guinea
## [190] Paraguay
## [191] Peru
## [192] Philippines
## [193] Poland
## [194] Portugal
## [195] Post-demographic dividend
## [196] Pre-demographic dividend
## [197] Puerto Rico
## [198] Qatar
## [199] Romania
## [200] Russian Federation
## [201] Rwanda
## [202] Samoa
## [203] San Marino
## [204] Sao Tome and Principe
## [205] Saudi Arabia
## [206] Senegal
## [207] Serbia
## [208] Seychelles
## [209] Sierra Leone
## [210] Singapore
## [211] Sint Maarten (Dutch part)
## [212] Slovak Republic
## [213] Slovenia
## [214] Small states
## [215] Solomon Islands
## [216] Somalia
## [217] South Africa
## [218] South Asia
## [219] South Asia (IDA & IBRD)
## [220] South Sudan
## [221] Spain
## [222] Sri Lanka
## [223] St. Kitts and Nevis
## [224] St. Lucia
## [225] St. Martin (French part)
## [226] St. Vincent and the Grenadines
## [227] Sub-Saharan Africa
## [228] Sub-Saharan Africa (excluding high income)
## [229] Sub-Saharan Africa (IDA & IBRD countries)
## [230] Sudan
## [231] Suriname
## [232] Sweden
## [233] Switzerland
## [234] Syrian Arab Republic
## [235] Tajikistan
## [236] Tanzania
## [237] Thailand
## [238] Timor-Leste
## [239] Togo
## [240] Tonga
## [241] Trinidad and Tobago
## [242] Tunisia
## [243] Turkey
## [244] Turkmenistan
## [245] Turks and Caicos Islands
## [246] Tuvalu
## [247] Uganda
## [248] Ukraine
## [249] United Arab Emirates
## [250] United Kingdom
## [251] United States
## [252] Upper middle income
## [253] Uruguay
## [254] Uzbekistan
## [255] Vanuatu
## [256] Venezuela, RB
## [257] Vietnam
## [258] Virgin Islands (U.S.)
## [259] West Bank and Gaza
## [260] World
## [261] Yemen, Rep.
## [262] Zambia
## [263] Zimbabwe
## 263 Levels: Afghanistan Albania Algeria American Samoa Andorra ... Zimbabwe
india_econ <- global_economy |> filter(Country == "India")
head(india_econ)
## # A tsibble: 6 x 9 [1Y]
## # Key: Country [1]
## Country Code Year GDP Growth CPI Imports Exports Population
## <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 India IND 1960 36535925031. NA 2.53 6.91 4.51 449480608
## 2 India IND 1961 38709096076. 3.72 2.57 6.02 4.35 458494963
## 3 India IND 1962 41599070242. 2.93 2.66 6.10 4.21 467852537
## 4 India IND 1963 47776000900. 5.99 2.74 5.97 4.33 477527970
## 5 India IND 1964 55726873084. 7.45 3.11 5.75 3.76 487484535
## 6 India IND 1965 58760424670. -2.64 3.40 5.27 3.34 497702365
#view India's exports
india_econ |> autoplot(Exports)
#decompose the data to view features
dcmp <- india_econ |>
model(stl = STL(Exports))
components(dcmp) |> autoplot()
The trend is the exports increase gradually with some bumps from 1960 to 2010. then it decreases after 2010.
fitann <- india_econ |>
model(
ANN = ETS(Exports ~ error("A") + trend("N") + season("N"))
)
fitann |>
forecast(h = 10) |>
autoplot(india_econ)+
labs(title="India's Exports",
y="Exports")
#fitann |> augment()
#accuracy can provide the RMSE for a model using the residuals
fitann |> accuracy()
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 India ANN Training 0.251 1.19 0.798 2.05 7.25 0.983 0.991 -0.0331
fitaan <- india_econ |>
model(
AAN = ETS(Exports ~ error("A") + trend("A") + season("N"))
)
fitaan |>
forecast(h = 10) |>
autoplot(india_econ)+
labs(title="India's Exports",
y="Exports")
fitaan |> accuracy()
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 India AAN Training -0.0501 1.18 0.736 0.239 6.98 0.906 0.978 -0.0294
the AAN model follow the most recent trend of the decrease in exports from 2010 onwards. it produces a slightly lower RMSE than the ANN model. in one way the ANN model is more optimistic in the amount of exports while the AAN is more pesimistic. They both have very wide 95% intervals.
d.Compare the forecasts from both methods. Which do you think is best?
A more conservative approach might be the AAN model since it expects exports to continue to lessen instead of improving. presenting this prediction may induce a policy change. The ANN model is basically naive showing little to no change from the last value. if only judging by the RMSE then the AAN model is slightly better.
f.Calculate a 95% prediction interval for the first forecast for each model, using the RMSE values and assuming normal errors. Compare your intervals with those produced using R.
following the calculation from question 1
#interval produced by R
fitann |>
forecast(h = 10) |>
hilo()
## # A tsibble: 10 x 7 [1Y]
## # Key: Country, .model [1]
## Country .model Year
## <fct> <chr> <dbl>
## 1 India ANN 2018
## 2 India ANN 2019
## 3 India ANN 2020
## 4 India ANN 2021
## 5 India ANN 2022
## 6 India ANN 2023
## 7 India ANN 2024
## 8 India ANN 2025
## 9 India ANN 2026
## 10 India ANN 2027
## # ℹ 4 more variables: Exports <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>
#to pick out the 95% prediction interval
fitann2 <- fitann |>
forecast(h = 10) |>
hilo()
accuracyann <- fitaan |> accuracy()
print(fitann2$`95%`[1])
## <hilo[1]>
## [1] [16.66831, 21.42248]95
#computed from +/- 1.96 * RMSE
print("computed 95% interval for A,N,N model")
## [1] "computed 95% interval for A,N,N model"
19.04539 + 1.96*accuracyann$RMSE[1]
## [1] 21.35036
19.04539 - 1.96*accuracyann$RMSE[1]
## [1] 16.74042
#interval produced by R
fitaan |>
forecast(h = 10) |>
hilo()
## # A tsibble: 10 x 7 [1Y]
## # Key: Country, .model [1]
## Country .model Year
## <fct> <chr> <dbl>
## 1 India AAN 2018
## 2 India AAN 2019
## 3 India AAN 2020
## 4 India AAN 2021
## 5 India AAN 2022
## 6 India AAN 2023
## 7 India AAN 2024
## 8 India AAN 2025
## 9 India AAN 2026
## 10 India AAN 2027
## # ℹ 4 more variables: Exports <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>
#to pick out the 95% prediction interval
fitaan2 <- fitaan |>
forecast(h = 10) |>
hilo()
print(fitaan2$`95%`[1])
## <hilo[1]>
## [1] [15.79928, 20.57683]95
accuracyaan <- fitaan |> accuracy()
print("computed 95% interval for A,A,N model")
## [1] "computed 95% interval for A,A,N model"
18.18805 + 1.96*accuracyaan$RMSE[1]
## [1] 20.49302
18.18805 - 1.96*accuracyaan$RMSE[1]
## [1] 15.88308
results of the 95% intervals calculated from RMSE differ by at most 0.1/15*100 or 0.67%
[Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]
#season can not be applied to this data either A or M
chinagdp <- global_economy |> filter(Country == "China") |> select(Year, GDP)
chinaaan <- chinagdp |>
model(
ANN = ETS(GDP ~ error("A") + trend("A") + season("N"))
)
chinaaan |>
forecast(h = 100) |>
autoplot(chinagdp)+
labs(title="A,A,N China's GDP",
y="GDP")
chinaaadn <- chinagdp |>
model(
AAN = ETS(GDP ~ error("A") + trend("Ad") + season("N"))
)
chinaaadn |>
forecast(h = 30) |>
autoplot(chinagdp)+
labs(title="A,Ad,N China's GDP",
y="GDP")
chinamadn <- chinagdp |>
model(
MAN = ETS(GDP ~ error("M") + trend("Ad") + season("N"))
)
chinamadn |>
forecast(h = 30) |>
autoplot(chinagdp)+
labs(title="M,Ad,N China's GDP",
y="GDP")
chinammn <- chinagdp |>
model(
MMN = ETS(GDP ~ error("M") + trend("M") + season("N"))
)
chinammn |>
forecast(h = 10) |>
autoplot(chinagdp)+
labs(title=" M,M,N China's GDP",
y="GDP")
chinaamn <- chinagdp |>
model(
AMN = ETS(GDP ~ error("A") + trend("M") + season("N"))
)
chinaamn |>
forecast(h = 10) |>
autoplot(chinagdp)+
labs(title=" A,M,N China's GDP",
y="GDP")
chinaann <- chinagdp |>
model(
ANN = ETS(GDP ~ error("A") + trend("N") + season("N"))
)
chinaann |>
forecast(h = 100) |>
autoplot(chinagdp)+
labs(title=" A,N,N China's GDP",
y="GDP")
chinamnn <- chinagdp |>
model(
MNN = ETS(GDP ~ error("M") + trend("N") + season("N"))
)
chinamnn |>
forecast(h = 100) |>
autoplot(chinagdp)+
labs(title=" M,N,N China's GDP",
y="GDP")
chinamadn |>forecast(h = 30) |> hilo()
## # A tsibble: 30 x 6 [1Y]
## # Key: .model [1]
## .model Year
## <chr> <dbl>
## 1 MAN 2018
## 2 MAN 2019
## 3 MAN 2020
## 4 MAN 2021
## 5 MAN 2022
## 6 MAN 2023
## 7 MAN 2024
## 8 MAN 2025
## 9 MAN 2026
## 10 MAN 2027
## # ℹ 20 more rows
## # ℹ 4 more variables: GDP <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>
chinaaadn |>forecast(h = 30) |> hilo()
## # A tsibble: 30 x 6 [1Y]
## # Key: .model [1]
## .model Year
## <chr> <dbl>
## 1 AAN 2018
## 2 AAN 2019
## 3 AAN 2020
## 4 AAN 2021
## 5 AAN 2022
## 6 AAN 2023
## 7 AAN 2024
## 8 AAN 2025
## 9 AAN 2026
## 10 AAN 2027
## # ℹ 20 more rows
## # ℹ 4 more variables: GDP <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>
an A,N,N is just a naive model. An A,A,N follows to most recent upward trend. dampening the trend in A,Ad,N at first follows the A,A,N trend but then the slope decreases or “dampens” for the longer forecasts. An M,N,N looks like naive or A,N,N but it has a wider 95% interval than A,N,N. A,Ad,N predicts a larger GDP than M,Ad,N however the predictions are very similar. M,M,N slope increases more rapidly than A,M,N. this may be due to M error emphasizing the most recent trend increase. MMN increases more rapidly and AMN
7.Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?
#generating the ETS model
gasdf <- aus_production |> select(Quarter, Gas)
fitgas <- gasdf |>
model(ETS(Gas))
report(fitgas)
## Series: Gas
## Model: ETS(M,A,M)
## Smoothing parameters:
## alpha = 0.6528545
## beta = 0.1441675
## gamma = 0.09784922
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 5.945592 0.07062881 0.9309236 1.177883 1.074851 0.8163427
##
## sigma^2: 0.0032
##
## AIC AICc BIC
## 1680.929 1681.794 1711.389
#decompose the model
components(fitgas) |>
autoplot() +
labs(title = "ETS components")
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
#using the model to forcast.
fitgas |>
forecast(h = 24) |>
autoplot(gasdf)+
labs(title="ETS model MAM gas",
y="gas")
#errors in the model, residuals
glance(fitgas)
## # A tibble: 1 × 9
## .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(Gas) 0.00324 -831. 1681. 1682. 1711. 21.1 32.2 0.0413
report(fitgas)
## Series: Gas
## Model: ETS(M,A,M)
## Smoothing parameters:
## alpha = 0.6528545
## beta = 0.1441675
## gamma = 0.09784922
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 5.945592 0.07062881 0.9309236 1.177883 1.074851 0.8163427
##
## sigma^2: 0.0032
##
## AIC AICc BIC
## 1680.929 1681.794 1711.389
fitgas |> accuracy()
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ETS(Gas) Training -0.115 4.60 3.02 0.199 4.08 0.542 0.606 -0.0131
#
fitgas |> gg_tsresiduals(lag=24)
fitgas |>forecast(h = 24) |> hilo()
## # A tsibble: 24 x 6 [1Q]
## # Key: .model [1]
## .model Quarter
## <chr> <qtr>
## 1 ETS(Gas) 2010 Q3
## 2 ETS(Gas) 2010 Q4
## 3 ETS(Gas) 2011 Q1
## 4 ETS(Gas) 2011 Q2
## 5 ETS(Gas) 2011 Q3
## 6 ETS(Gas) 2011 Q4
## 7 ETS(Gas) 2012 Q1
## 8 ETS(Gas) 2012 Q2
## 9 ETS(Gas) 2012 Q3
## 10 ETS(Gas) 2012 Q4
## # ℹ 14 more rows
## # ℹ 4 more variables: Gas <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>
gasmadm <- gasdf |>
model(
MAM = ETS(Gas ~ error("M") + trend("Ad") + season("M"))
)
gasmadm |>
forecast(h = 24) |>
autoplot(gasdf)+
labs(title=" M,ad,M Gas",
y="Gas")
glance(gasmadm)
## # A tibble: 1 × 9
## .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MAM 0.00329 -832. 1684. 1685. 1718. 21.1 32.0 0.0417
report(gasmadm)
## Series: Gas
## Model: ETS(M,Ad,M)
## Smoothing parameters:
## alpha = 0.6489044
## beta = 0.1551275
## gamma = 0.09369372
## phi = 0.98
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 5.858941 0.09944006 0.9281912 1.177903 1.07678 0.8171255
##
## sigma^2: 0.0033
##
## AIC AICc BIC
## 1684.028 1685.091 1717.873
gasmadm |> accuracy()
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MAM Training -0.00439 4.59 3.03 0.326 4.10 0.544 0.606 -0.0217
gasmadm |> gg_tsresiduals(lag=24)
gasmadm |>forecast(h = 24) |> hilo()
## # A tsibble: 24 x 6 [1Q]
## # Key: .model [1]
## .model Quarter
## <chr> <qtr>
## 1 MAM 2010 Q3
## 2 MAM 2010 Q4
## 3 MAM 2011 Q1
## 4 MAM 2011 Q2
## 5 MAM 2011 Q3
## 6 MAM 2011 Q4
## 7 MAM 2012 Q1
## 8 MAM 2012 Q2
## 9 MAM 2012 Q3
## 10 MAM 2012 Q4
## # ℹ 14 more rows
## # ℹ 4 more variables: Gas <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>
#compare to A seasonality
gasmaa <- gasdf |>
model(
MAA = ETS(Gas ~ error("M") + trend("A") + season("A"))
)
gasmaa |>
forecast(h = 24) |>
autoplot(gasdf)+
labs(title=" M,A,A Gas",
y="Gas")
glance(gasmaa)
## # A tibble: 1 × 9
## .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MAA 0.0430 -1120. 2257. 2258. 2288. 40.9 64.0 0.0844
report(gasmaa)
## Series: Gas
## Model: ETS(M,A,A)
## Smoothing parameters:
## alpha = 0.291845
## beta = 0.2918449
## gamma = 0.4904944
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3]
## 12.47885 -3.669884 -9.763467 23.34845 6.323571 -19.90855
##
## sigma^2: 0.043
##
## AIC AICc BIC
## 2257.099 2257.964 2287.559
gasmaa |> accuracy()
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MAA Training 0.0508 6.39 4.03 2.02 14.5 0.723 0.843 0.226
gasmaa |>forecast(h = 24) |> hilo()
## # A tsibble: 24 x 6 [1Q]
## # Key: .model [1]
## .model Quarter
## <chr> <qtr>
## 1 MAA 2010 Q3
## 2 MAA 2010 Q4
## 3 MAA 2011 Q1
## 4 MAA 2011 Q2
## 5 MAA 2011 Q3
## 6 MAA 2011 Q4
## 7 MAA 2012 Q1
## 8 MAA 2012 Q2
## 9 MAA 2012 Q3
## 10 MAA 2012 Q4
## # ℹ 14 more rows
## # ℹ 4 more variables: Gas <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>
#N seasonality
gasman <- gasdf |>
model(
MAN = ETS(Gas ~ error("M") + trend("A") + season("N"))
)
gasman |>
forecast(h = 24) |>
autoplot(gasdf)+
labs(title=" M,A,N Gas",
y="Gas")
glance(gasman)
## # A tibble: 1 × 9
## .model sigma2 log_lik AIC AICc BIC MSE AMSE MAE
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MAN 0.0266 -1064. 2138. 2138. 2154. 268. 249. 0.136
report(gasman)
## Series: Gas
## Model: ETS(M,A,N)
## Smoothing parameters:
## alpha = 0.111075
## beta = 0.1018672
##
## Initial states:
## l[0] b[0]
## 5.761939 0.0725193
##
## sigma^2: 0.0266
##
## AIC AICc BIC
## 2137.521 2137.804 2154.443
gasman |> accuracy()
## # A tibble: 1 × 10
## .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MAN Training -0.0787 16.4 11.9 -1.32 13.5 2.14 2.16 0.0888
gasman |>forecast(h = 24) |> hilo()
## # A tsibble: 24 x 6 [1Q]
## # Key: .model [1]
## .model Quarter
## <chr> <qtr>
## 1 MAN 2010 Q3
## 2 MAN 2010 Q4
## 3 MAN 2011 Q1
## 4 MAN 2011 Q2
## 5 MAN 2011 Q3
## 6 MAN 2011 Q4
## 7 MAN 2012 Q1
## 8 MAN 2012 Q2
## 9 MAN 2012 Q3
## 10 MAN 2012 Q4
## # ℹ 14 more rows
## # ℹ 4 more variables: Gas <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>
Akaike’s Information Criterion (AIC) Mean Absolute error (MAE) MAM: AIC: 1680.929, RMSE: 4.595113, MAE: 0.04134491 MAdM: AIC: 1684.091, RMSE: 4.59184, MAE: 0.04165944 MAA: AIC: 2257.099, RMSE: 6.391643, MAE:0.08440083 MAN: AIC: 2137.521, RMSE: 16.35735, MAE: 0.1356131
Damping the trend reduces the RMSE slightly however it also raises the AIC. preference goes to the lower AIC for automated model selection. Also the mean absolute error for the undamped M,A,M model is less. However the models(M,A,M and M,Ad,M) are roughly equivalent.
the preference for Multiplicative seasonality is indicative of a seasonality that scales with the data. for an additive the seasonality has little variation despite the changing trend in the data. removing seasonality from the model drastically increases the MAE, AIC, and RMSE. changing it to additive also yields worse residuals however it is better than a model with none for seasonality.
#info from 2.10 number 7
set.seed(987654321)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
#breaking down the data
myseries |> autoplot()
## Plot variable not specified, automatically selected `.vars = Turnover`
myseries |> gg_season()
## Plot variable not specified, automatically selected `y = Turnover`
myseries |> gg_subseries()
## Plot variable not specified, automatically selected `y = Turnover`
myseries |> gg_lag()
## Plot variable not specified, automatically selected `y = Turnover`
the seasonality scales with the level or trend of the data. at the beginning when the data has low values its variation is also low. as the values increase the difference between the min and max for a year also increase. with this change in seasonality a multiplicative seasonality is necessary for this series. if the seasonality differences were consistant throughout then an additive seasonality would be more appropriate.
#multiplicative holt winters
fitmam <- myseries |> model(
multiplicative = ETS(Turnover ~ error("M") + trend("A") +
season("M"))
)
#trend damped multiplicative holt winters
fitmadm <- myseries |> model( MAdM =
ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
)
# models trained then forcast
fcmam <- fitmam |> forecast(h = "10 years")
fcmadm <- fitmadm |> forecast(h = "10 years")
#plot data with forcast
fcmam |>
autoplot(myseries) +
labs(title="retail M,A,M",
y="turnover")
fcmadm |>
autoplot(myseries) +
labs(title="retail M,Ad,M",
y="turnover")
#statistics on MAM and MAdM models
fitmam |> accuracy()
## # A tibble: 1 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Vict… Other r… multi… Trai… -0.0589 5.13 3.67 -0.510 5.49 0.658 0.711 0.309
fitmam |> report()
## Series: Turnover
## Model: ETS(M,A,M)
## Smoothing parameters:
## alpha = 0.2977895
## beta = 0.0001025242
## gamma = 0.2942524
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5]
## 16.23251 0.1903941 0.9590884 0.9116755 0.8968424 1.306677 1.070696 1.029606
## s[-6] s[-7] s[-8] s[-9] s[-10] s[-11]
## 0.9450492 0.9897274 0.962153 0.9419827 1.003206 0.9832971
##
## sigma^2: 0.0054
##
## AIC AICc BIC
## 3971.986 3973.433 4041.500
fcmam |> hilo()
## # A tsibble: 120 x 8 [1M]
## # Key: State, Industry, .model [1]
## State Industry .model Month
## <chr> <chr> <chr> <mth>
## 1 Victoria Other recreational goods retailing multiplicative 2019 Jan
## 2 Victoria Other recreational goods retailing multiplicative 2019 Feb
## 3 Victoria Other recreational goods retailing multiplicative 2019 Mar
## 4 Victoria Other recreational goods retailing multiplicative 2019 Apr
## 5 Victoria Other recreational goods retailing multiplicative 2019 May
## 6 Victoria Other recreational goods retailing multiplicative 2019 Jun
## 7 Victoria Other recreational goods retailing multiplicative 2019 Jul
## 8 Victoria Other recreational goods retailing multiplicative 2019 Aug
## 9 Victoria Other recreational goods retailing multiplicative 2019 Sep
## 10 Victoria Other recreational goods retailing multiplicative 2019 Oct
## # ℹ 110 more rows
## # ℹ 4 more variables: Turnover <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>
fitmadm |> accuracy()
## # A tibble: 1 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Victoria Other r… MAdM Trai… 0.344 5.16 3.69 0.232 5.49 0.660 0.716 0.306
fitmadm |> report()
## Series: Turnover
## Model: ETS(M,Ad,M)
## Smoothing parameters:
## alpha = 0.2994731
## beta = 0.003000567
## gamma = 0.3123052
## phi = 0.9799883
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5]
## 15.73264 0.1849834 0.9487417 0.9006466 0.9090399 1.320031 1.059082 1.01368
## s[-6] s[-7] s[-8] s[-9] s[-10] s[-11]
## 0.9311945 0.9850626 0.9719544 0.9556592 1.039885 0.9650238
##
## sigma^2: 0.0056
##
## AIC AICc BIC
## 3983.180 3984.801 4056.783
fcmadm |> hilo()
## # A tsibble: 120 x 8 [1M]
## # Key: State, Industry, .model [1]
## State Industry .model Month
## <chr> <chr> <chr> <mth>
## 1 Victoria Other recreational goods retailing MAdM 2019 Jan
## 2 Victoria Other recreational goods retailing MAdM 2019 Feb
## 3 Victoria Other recreational goods retailing MAdM 2019 Mar
## 4 Victoria Other recreational goods retailing MAdM 2019 Apr
## 5 Victoria Other recreational goods retailing MAdM 2019 May
## 6 Victoria Other recreational goods retailing MAdM 2019 Jun
## 7 Victoria Other recreational goods retailing MAdM 2019 Jul
## 8 Victoria Other recreational goods retailing MAdM 2019 Aug
## 9 Victoria Other recreational goods retailing MAdM 2019 Sep
## 10 Victoria Other recreational goods retailing MAdM 2019 Oct
## # ℹ 110 more rows
## # ℹ 4 more variables: Turnover <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>
M,A,M RMSE: 5.125605 M,Ad,M RMSE: 5.159722
The RMSE is very similar between the two models. despite the M,A,M having a slightly lower RMSE I prefer the M,Ad, M model because it follows the later downward trend better.
#residuals of the MAM and MAdM models
fitmam |> gg_tsresiduals(lag=20)
fitmadm |> gg_tsresiduals(lag=20)
#checking if MAM is the best model
fitauto <- myseries |> model(ETS(Turnover))
fitauto |> report()
## Series: Turnover
## Model: ETS(M,A,M)
## Smoothing parameters:
## alpha = 0.2977895
## beta = 0.0001025242
## gamma = 0.2942524
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5]
## 16.23251 0.1903941 0.9590884 0.9116755 0.8968424 1.306677 1.070696 1.029606
## s[-6] s[-7] s[-8] s[-9] s[-10] s[-11]
## 0.9450492 0.9897274 0.962153 0.9419827 1.003206 0.9832971
##
## sigma^2: 0.0054
##
## AIC AICc BIC
## 3971.986 3973.433 4041.500
fitauto |> gg_tsresiduals(lag=20)
augment(fitauto) |>
features(.innov, ljung_box, lag = 20)
## # A tibble: 1 × 5
## State Industry .model lb_stat lb_pvalue
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Victoria Other recreational goods retailing ETS(Turnover) 123. 1.11e-16
automated ETS model selects the M,A,M model.
residuals look right skewed. there are several residuals that pass the acf threshold. the autocorrelation is not close to zero for several lags. Leung box p value is significantly less than 0.05 so there is autocorrelation.
train <- myseries |>
filter_index("1982" ~ "2010")
## Warning: There were 2 warnings in `filter()`.
## The first warning was:
## ℹ In argument: `time_in(Month, ...)`.
## Caused by warning:
## ! `yearmonth()` may yield unexpected results.
## ℹ Please use arg `format` to supply formats.
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
fit2010 <- train |> model(ETS(Turnover))
fc2010 <- fit2010 |> forecast(h= "9 years")
fc2010 |>
autoplot(myseries) +
labs(title="2010 train",
y="turnover")
fitsnaive <- train |>
model(SNAIVE(Turnover))
fcsnaive <- fitsnaive |> forecast(h= "9 years")
fcsnaive |>
autoplot(myseries) +
labs(title="Snaive",
y="turnover")
fitsnaive |> report()
## Series: Turnover
## Model: SNAIVE
##
## sigma^2: 41.1598
fitsnaive |> gg_tsresiduals(lag=108)
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_bin()`).
augment(fitsnaive) |>
features(.innov, ljung_box, lag = 108)
## # A tibble: 1 × 5
## State Industry .model lb_stat lb_pvalue
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Victoria Other recreational goods retailing SNAIVE(Turnover) 803. 0
fit2010 |> report()
## Series: Turnover
## Model: ETS(M,A,M)
## Smoothing parameters:
## alpha = 0.2878128
## beta = 0.000100698
## gamma = 0.289654
##
## Initial states:
## l[0] b[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5]
## 16.30871 0.2252655 0.9588057 0.8953244 0.9139321 1.337444 1.050725 1.004981
## s[-6] s[-7] s[-8] s[-9] s[-10] s[-11]
## 0.9867994 0.978916 0.9678668 0.970458 1.010486 0.9242624
##
## sigma^2: 0.0048
##
## AIC AICc BIC
## 2805.139 2807.076 2869.928
fit2010 |> gg_tsresiduals(lag=108)
augment(fit2010) |>
features(.innov, ljung_box, lag = 108)
## # A tibble: 1 × 5
## State Industry .model lb_stat lb_pvalue
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Victoria Other recreational goods retailing ETS(Turnover) 250. 2.80e-13
well according to the p value from the ljung_box the exponential smoothing model is less autocorrelated than the Seasonal Naive model. still not white noise.
fit2010 |> accuracy()
## # A tibble: 1 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Victo… Other r… ETS(T… Trai… 0.0361 4.39 3.13 -0.440 5.17 0.583 0.626 0.316
fc2010 |> hilo()
## # A tsibble: 108 x 8 [1M]
## # Key: State, Industry, .model [1]
## State Industry .model Month
## <chr> <chr> <chr> <mth>
## 1 Victoria Other recreational goods retailing ETS(Turnover) 2010 Feb
## 2 Victoria Other recreational goods retailing ETS(Turnover) 2010 Mar
## 3 Victoria Other recreational goods retailing ETS(Turnover) 2010 Apr
## 4 Victoria Other recreational goods retailing ETS(Turnover) 2010 May
## 5 Victoria Other recreational goods retailing ETS(Turnover) 2010 Jun
## 6 Victoria Other recreational goods retailing ETS(Turnover) 2010 Jul
## 7 Victoria Other recreational goods retailing ETS(Turnover) 2010 Aug
## 8 Victoria Other recreational goods retailing ETS(Turnover) 2010 Sep
## 9 Victoria Other recreational goods retailing ETS(Turnover) 2010 Oct
## 10 Victoria Other recreational goods retailing ETS(Turnover) 2010 Nov
## # ℹ 98 more rows
## # ℹ 4 more variables: Turnover <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>
fitsnaive |> accuracy()
## # A tibble: 1 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Victoria Other r… SNAIV… Trai… 2.85 7.01 5.37 5.76 9.02 1 1 0.647
fcsnaive |> hilo()
## # A tsibble: 108 x 8 [1M]
## # Key: State, Industry, .model [1]
## State Industry .model Month
## <chr> <chr> <chr> <mth>
## 1 Victoria Other recreational goods retailing SNAIVE(Turnover) 2010 Feb
## 2 Victoria Other recreational goods retailing SNAIVE(Turnover) 2010 Mar
## 3 Victoria Other recreational goods retailing SNAIVE(Turnover) 2010 Apr
## 4 Victoria Other recreational goods retailing SNAIVE(Turnover) 2010 May
## 5 Victoria Other recreational goods retailing SNAIVE(Turnover) 2010 Jun
## 6 Victoria Other recreational goods retailing SNAIVE(Turnover) 2010 Jul
## 7 Victoria Other recreational goods retailing SNAIVE(Turnover) 2010 Aug
## 8 Victoria Other recreational goods retailing SNAIVE(Turnover) 2010 Sep
## 9 Victoria Other recreational goods retailing SNAIVE(Turnover) 2010 Oct
## 10 Victoria Other recreational goods retailing SNAIVE(Turnover) 2010 Nov
## # ℹ 98 more rows
## # ℹ 4 more variables: Turnover <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>
the exponential smoothing model (M,A,M) has a smaller RMSE than the Seasonal Naive model.
9.For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?
#select the best lambda for box cox
lambda <- myseries |>
features(Turnover, features = guerrero) |>
pull(lambda_guerrero)
#plot the box cox transformed data
myseries |>
autoplot(box_cox(Turnover, lambda)) +
labs(y = "Turnover",
title = latex2exp::TeX(paste0(
"Transformed Turnover with $\\lambda$ = ",
round(lambda,2))))
#store the box cox transformated values into the data frame
tmyseries <- myseries |> mutate(tturnover = box_cox(Turnover, lambda) )
#stl of box cox transformed series
tmyseries |>
model(
STL(tturnover ~ trend(window = 7) +
season(window = "periodic"),
robust = TRUE)) |>
components() |>
autoplot()
#ETS model of boxcox transformed data
fitbox <- tmyseries |> model(ETS(tturnover))
fcbox <- fitbox |> forecast(h = "9 years")
#visualize the
fcbox |>
autoplot(tmyseries) +
labs(title = "boxcox tmyseries")
fcbox |> hilo()
## # A tsibble: 108 x 8 [1M]
## # Key: State, Industry, .model [1]
## State Industry .model Month
## <chr> <chr> <chr> <mth>
## 1 Victoria Other recreational goods retailing ETS(tturnover) 2019 Jan
## 2 Victoria Other recreational goods retailing ETS(tturnover) 2019 Feb
## 3 Victoria Other recreational goods retailing ETS(tturnover) 2019 Mar
## 4 Victoria Other recreational goods retailing ETS(tturnover) 2019 Apr
## 5 Victoria Other recreational goods retailing ETS(tturnover) 2019 May
## 6 Victoria Other recreational goods retailing ETS(tturnover) 2019 Jun
## 7 Victoria Other recreational goods retailing ETS(tturnover) 2019 Jul
## 8 Victoria Other recreational goods retailing ETS(tturnover) 2019 Aug
## 9 Victoria Other recreational goods retailing ETS(tturnover) 2019 Sep
## 10 Victoria Other recreational goods retailing ETS(tturnover) 2019 Oct
## # ℹ 98 more rows
## # ℹ 4 more variables: tturnover <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>
fitbox |> accuracy()
## # A tibble: 1 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Victor… Other r… ETS(t… Trai… -0.00140 0.0243 0.0187 -0.0614 0.770 0.590 0.603
## # ℹ 1 more variable: ACF1 <dbl>
fitbox |> gg_tsresiduals(lag=108)
augment(fitbox) |>
features(.innov, ljung_box, lag = 108)
## # A tibble: 1 × 5
## State Industry .model lb_stat lb_pvalue
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Victoria Other recreational goods retailing ETS(tturnover) 269. 1.11e-15
#trying to create a model that will work on the untransformed data
fitbox2 <- myseries |> model(ETS(box_cox(Turnover, lambda)))
#forcast of 9 years after the end of the data
fcbox2 <- fitbox2 |> forecast(h = "9 years")
#visualize the forecasted model
fcbox2 |>
autoplot(myseries) +
labs(title = "boxcox myseries")
#check the residual statistics of the model on the data set.
fitbox2 |> accuracy()
## # A tibble: 1 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Victo… Other r… ETS(b… Trai… -0.300 5.34 3.76 -0.678 5.52 0.673 0.741 0.270
fitbox2 |> gg_tsresiduals(lag=108)
augment(fitbox2) |>
features(.innov, ljung_box, lag = 108)
## # A tibble: 1 × 5
## State Industry .model lb_stat lb_pvalue
## <chr> <chr> <chr> <dbl> <dbl>
## 1 Victoria Other recreational goods retailing ETS(box_cox(Tur… 269. 1.11e-15
still a lot of autocorrelation even when the model was trained on the box cox transformed data. RMSE is 5.340485 which is lower than the Snaive but higher than the MAM untransformed model. judging by MAE and other values the MAM model on the untransformed values seems to be best.