Data 624 assignment 5

8.1, 8.5, 8.6, 8.7, 8.8, 8.9

#loaded relevant libraries
library(fpp3)

## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr

## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──

## ✔ tibble      3.2.1     ✔ tsibble     1.1.5
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ feasts      0.4.1
## ✔ lubridate   1.9.3     ✔ fable       0.4.0
## ✔ ggplot2     3.5.1

## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0     ✔ readr   2.1.5
## ✔ purrr   1.0.2     ✔ stringr 1.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()     masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(readr)
library(latex2exp)
library(seasonal)

## 
## Attaching package: 'seasonal'
## 
## The following object is masked from 'package:tibble':
## 
##     view

##8.1) Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset a) Use the ETS() function to estimate the equivalent model for simple exponential smoothing. Find the optimal values of α and ℓ0, and generate forecasts for the next four months.

head(aus_livestock)

## # A tsibble: 6 x 4 [1M]
## # Key:       Animal, State [1]
##      Month Animal                     State                        Count
##      <mth> <fct>                      <fct>                        <dbl>
## 1 1976 Jul Bulls, bullocks and steers Australian Capital Territory  2300
## 2 1976 Aug Bulls, bullocks and steers Australian Capital Territory  2100
## 3 1976 Sep Bulls, bullocks and steers Australian Capital Territory  2100
## 4 1976 Oct Bulls, bullocks and steers Australian Capital Territory  1900
## 5 1976 Nov Bulls, bullocks and steers Australian Capital Territory  2100
## 6 1976 Dec Bulls, bullocks and steers Australian Capital Territory  1800

unique(aus_livestock$Animal)

## [1] Bulls, bullocks and steers Calves                    
## [3] Cattle (excl. calves)      Cows and heifers          
## [5] Lambs                      Pigs                      
## [7] Sheep                     
## 7 Levels: Bulls, bullocks and steers Calves ... Sheep

#filter out the data needed
aus_pigs <- aus_livestock |>
  filter(Animal == "Pigs", State == "Victoria") 
#view the data
aus_livestock |>
  filter(Animal == "Pigs", State == "Victoria") |> autoplot()

## Plot variable not specified, automatically selected `.vars = Count`

#build a model
fit <- aus_pigs |>
  model(ETS(Count))
report(fit)

## Series: Count 
## Model: ETS(A,N,A) 
##   Smoothing parameters:
##     alpha = 0.3579401 
##     gamma = 0.0001000139 
## 
##   Initial states:
##     l[0]     s[0]    s[-1]     s[-2]     s[-3]     s[-4]     s[-5]   s[-6]
##  95487.5 2066.235 6717.311 -2809.562 -1341.299 -7750.173 -8483.418 5610.89
##      s[-7]    s[-8]     s[-9]   s[-10]   s[-11]
##  -579.8107 1215.464 -2827.091 1739.465 6441.989
## 
##   sigma^2:  60742898
## 
##      AIC     AICc      BIC 
## 13545.38 13546.26 13610.24

#model selected A,N,A

alpha = 0.3579401 gamma = 0.0001000139 l0 95487.5

#forcast with the model for 4 months
fit |>
  forecast(h = 4) |>
  autoplot(aus_pigs)+
  labs(title="Pigs Victoria",
       y="Count")

Compute a 95% prediction interval for the first forecast using y±1.96s where s is the standard deviation of the residuals. Compare your interval with the interval produced by R

#interval produced by R
  fit |>
  forecast(h = 4) |>
  hilo()

## # A tsibble: 4 x 8 [1M]
## # Key:       Animal, State, .model [1]
##   Animal State    .model        Month
##   <fct>  <fct>    <chr>         <mth>
## 1 Pigs   Victoria ETS(Count) 2019 Jan
## 2 Pigs   Victoria ETS(Count) 2019 Feb
## 3 Pigs   Victoria ETS(Count) 2019 Mar
## 4 Pigs   Victoria ETS(Count) 2019 Apr
## # ℹ 4 more variables: Count <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>

#to pick out the 95% prediction interval
fit95 <-  fit |>
  forecast(h = 4) |>
  hilo()
#gather the residuals to compute the standard deviation
residinfo <- augment(fit)

class(residinfo)

## [1] "tbl_ts"     "tbl_df"     "tbl"        "data.frame"

#putting the residuals into residinfores
residinfores <- residinfo$.resid
#compute the standard deviation of the residuals
sd(residinfores)

## [1] 7702.275

#7702.275

print("s=7702.275")

## [1] "s=7702.275"

print("y+1.96s")

## [1] "y+1.96s"

print(84424.71 + 1.96 * 7702.275)

## [1] 99521.17

print("y-1.96s")

## [1] "y-1.96s"

print(84424.71 - 1.96 * 7702.275)

## [1] 69328.25

print(fit95$`95%`[1])

## <hilo[1]>
## [1] [69149.19, 99700.22]95

print("percent difference of y - 1.96s compared to r generated value")

## [1] "percent difference of y - 1.96s compared to r generated value"

((69328-69149.19)/69149.19 ) * 100

## [1] 0.2585858

print("percent difference of y + 1.96s compared to r generated value")

## [1] "percent difference of y + 1.96s compared to r generated value"

((99521.17-99700.22)/99700.22 ) * 100

## [1] -0.1795884

when compared to r intervals. my computed intervals differ by about .2% to .3%

##8.5

Data set global_economy contains the annual Exports from many countries. Select one country to analyse. a) Plot the Exports series and discuss the main features of the data.

unique(global_economy$Country)

##   [1] Afghanistan                                         
##   [2] Albania                                             
##   [3] Algeria                                             
##   [4] American Samoa                                      
##   [5] Andorra                                             
##   [6] Angola                                              
##   [7] Antigua and Barbuda                                 
##   [8] Arab World                                          
##   [9] Argentina                                           
##  [10] Armenia                                             
##  [11] Aruba                                               
##  [12] Australia                                           
##  [13] Austria                                             
##  [14] Azerbaijan                                          
##  [15] Bahamas, The                                        
##  [16] Bahrain                                             
##  [17] Bangladesh                                          
##  [18] Barbados                                            
##  [19] Belarus                                             
##  [20] Belgium                                             
##  [21] Belize                                              
##  [22] Benin                                               
##  [23] Bermuda                                             
##  [24] Bhutan                                              
##  [25] Bolivia                                             
##  [26] Bosnia and Herzegovina                              
##  [27] Botswana                                            
##  [28] Brazil                                              
##  [29] British Virgin Islands                              
##  [30] Brunei Darussalam                                   
##  [31] Bulgaria                                            
##  [32] Burkina Faso                                        
##  [33] Burundi                                             
##  [34] Cabo Verde                                          
##  [35] Cambodia                                            
##  [36] Cameroon                                            
##  [37] Canada                                              
##  [38] Caribbean small states                              
##  [39] Cayman Islands                                      
##  [40] Central African Republic                            
##  [41] Central Europe and the Baltics                      
##  [42] Chad                                                
##  [43] Channel Islands                                     
##  [44] Chile                                               
##  [45] China                                               
##  [46] Colombia                                            
##  [47] Comoros                                             
##  [48] Congo, Dem. Rep.                                    
##  [49] Congo, Rep.                                         
##  [50] Costa Rica                                          
##  [51] Cote d'Ivoire                                       
##  [52] Croatia                                             
##  [53] Cuba                                                
##  [54] Curacao                                             
##  [55] Cyprus                                              
##  [56] Czech Republic                                      
##  [57] Denmark                                             
##  [58] Djibouti                                            
##  [59] Dominica                                            
##  [60] Dominican Republic                                  
##  [61] Early-demographic dividend                          
##  [62] East Asia & Pacific                                 
##  [63] East Asia & Pacific (excluding high income)         
##  [64] East Asia & Pacific (IDA & IBRD countries)          
##  [65] Ecuador                                             
##  [66] Egypt, Arab Rep.                                    
##  [67] El Salvador                                         
##  [68] Equatorial Guinea                                   
##  [69] Eritrea                                             
##  [70] Estonia                                             
##  [71] Eswatini                                            
##  [72] Ethiopia                                            
##  [73] Euro area                                           
##  [74] Europe & Central Asia                               
##  [75] Europe & Central Asia (excluding high income)       
##  [76] Europe & Central Asia (IDA & IBRD countries)        
##  [77] European Union                                      
##  [78] Faroe Islands                                       
##  [79] Fiji                                                
##  [80] Finland                                             
##  [81] Fragile and conflict affected situations            
##  [82] France                                              
##  [83] French Polynesia                                    
##  [84] Gabon                                               
##  [85] Gambia, The                                         
##  [86] Georgia                                             
##  [87] Germany                                             
##  [88] Ghana                                               
##  [89] Gibraltar                                           
##  [90] Greece                                              
##  [91] Greenland                                           
##  [92] Grenada                                             
##  [93] Guam                                                
##  [94] Guatemala                                           
##  [95] Guinea                                              
##  [96] Guinea-Bissau                                       
##  [97] Guyana                                              
##  [98] Haiti                                               
##  [99] Heavily indebted poor countries (HIPC)              
## [100] High income                                         
## [101] Honduras                                            
## [102] Hong Kong SAR, China                                
## [103] Hungary                                             
## [104] IBRD only                                           
## [105] Iceland                                             
## [106] IDA & IBRD total                                    
## [107] IDA blend                                           
## [108] IDA only                                            
## [109] IDA total                                           
## [110] India                                               
## [111] Indonesia                                           
## [112] Iran, Islamic Rep.                                  
## [113] Iraq                                                
## [114] Ireland                                             
## [115] Isle of Man                                         
## [116] Israel                                              
## [117] Italy                                               
## [118] Jamaica                                             
## [119] Japan                                               
## [120] Jordan                                              
## [121] Kazakhstan                                          
## [122] Kenya                                               
## [123] Kiribati                                            
## [124] Korea, Dem. People's Rep.                           
## [125] Korea, Rep.                                         
## [126] Kosovo                                              
## [127] Kuwait                                              
## [128] Kyrgyz Republic                                     
## [129] Lao PDR                                             
## [130] Late-demographic dividend                           
## [131] Latin America & Caribbean                           
## [132] Latin America & Caribbean (excluding high income)   
## [133] Latin America & the Caribbean (IDA & IBRD countries)
## [134] Latvia                                              
## [135] Least developed countries: UN classification        
## [136] Lebanon                                             
## [137] Lesotho                                             
## [138] Liberia                                             
## [139] Libya                                               
## [140] Liechtenstein                                       
## [141] Lithuania                                           
## [142] Low & middle income                                 
## [143] Low income                                          
## [144] Lower middle income                                 
## [145] Luxembourg                                          
## [146] Macao SAR, China                                    
## [147] Macedonia, FYR                                      
## [148] Madagascar                                          
## [149] Malawi                                              
## [150] Malaysia                                            
## [151] Maldives                                            
## [152] Mali                                                
## [153] Malta                                               
## [154] Marshall Islands                                    
## [155] Mauritania                                          
## [156] Mauritius                                           
## [157] Mexico                                              
## [158] Micronesia, Fed. Sts.                               
## [159] Middle East & North Africa                          
## [160] Middle East & North Africa (excluding high income)  
## [161] Middle East & North Africa (IDA & IBRD countries)   
## [162] Middle income                                       
## [163] Moldova                                             
## [164] Monaco                                              
## [165] Mongolia                                            
## [166] Montenegro                                          
## [167] Morocco                                             
## [168] Mozambique                                          
## [169] Myanmar                                             
## [170] Namibia                                             
## [171] Nauru                                               
## [172] Nepal                                               
## [173] Netherlands                                         
## [174] New Caledonia                                       
## [175] New Zealand                                         
## [176] Nicaragua                                           
## [177] Niger                                               
## [178] Nigeria                                             
## [179] North America                                       
## [180] Northern Mariana Islands                            
## [181] Norway                                              
## [182] OECD members                                        
## [183] Oman                                                
## [184] Other small states                                  
## [185] Pacific island small states                         
## [186] Pakistan                                            
## [187] Palau                                               
## [188] Panama                                              
## [189] Papua New Guinea                                    
## [190] Paraguay                                            
## [191] Peru                                                
## [192] Philippines                                         
## [193] Poland                                              
## [194] Portugal                                            
## [195] Post-demographic dividend                           
## [196] Pre-demographic dividend                            
## [197] Puerto Rico                                         
## [198] Qatar                                               
## [199] Romania                                             
## [200] Russian Federation                                  
## [201] Rwanda                                              
## [202] Samoa                                               
## [203] San Marino                                          
## [204] Sao Tome and Principe                               
## [205] Saudi Arabia                                        
## [206] Senegal                                             
## [207] Serbia                                              
## [208] Seychelles                                          
## [209] Sierra Leone                                        
## [210] Singapore                                           
## [211] Sint Maarten (Dutch part)                           
## [212] Slovak Republic                                     
## [213] Slovenia                                            
## [214] Small states                                        
## [215] Solomon Islands                                     
## [216] Somalia                                             
## [217] South Africa                                        
## [218] South Asia                                          
## [219] South Asia (IDA & IBRD)                             
## [220] South Sudan                                         
## [221] Spain                                               
## [222] Sri Lanka                                           
## [223] St. Kitts and Nevis                                 
## [224] St. Lucia                                           
## [225] St. Martin (French part)                            
## [226] St. Vincent and the Grenadines                      
## [227] Sub-Saharan Africa                                  
## [228] Sub-Saharan Africa (excluding high income)          
## [229] Sub-Saharan Africa (IDA & IBRD countries)           
## [230] Sudan                                               
## [231] Suriname                                            
## [232] Sweden                                              
## [233] Switzerland                                         
## [234] Syrian Arab Republic                                
## [235] Tajikistan                                          
## [236] Tanzania                                            
## [237] Thailand                                            
## [238] Timor-Leste                                         
## [239] Togo                                                
## [240] Tonga                                               
## [241] Trinidad and Tobago                                 
## [242] Tunisia                                             
## [243] Turkey                                              
## [244] Turkmenistan                                        
## [245] Turks and Caicos Islands                            
## [246] Tuvalu                                              
## [247] Uganda                                              
## [248] Ukraine                                             
## [249] United Arab Emirates                                
## [250] United Kingdom                                      
## [251] United States                                       
## [252] Upper middle income                                 
## [253] Uruguay                                             
## [254] Uzbekistan                                          
## [255] Vanuatu                                             
## [256] Venezuela, RB                                       
## [257] Vietnam                                             
## [258] Virgin Islands (U.S.)                               
## [259] West Bank and Gaza                                  
## [260] World                                               
## [261] Yemen, Rep.                                         
## [262] Zambia                                              
## [263] Zimbabwe                                            
## 263 Levels: Afghanistan Albania Algeria American Samoa Andorra ... Zimbabwe

india_econ <- global_economy |> filter(Country == "India")
head(india_econ)

## # A tsibble: 6 x 9 [1Y]
## # Key:       Country [1]
##   Country Code   Year          GDP Growth   CPI Imports Exports Population
##   <fct>   <fct> <dbl>        <dbl>  <dbl> <dbl>   <dbl>   <dbl>      <dbl>
## 1 India   IND    1960 36535925031.  NA     2.53    6.91    4.51  449480608
## 2 India   IND    1961 38709096076.   3.72  2.57    6.02    4.35  458494963
## 3 India   IND    1962 41599070242.   2.93  2.66    6.10    4.21  467852537
## 4 India   IND    1963 47776000900.   5.99  2.74    5.97    4.33  477527970
## 5 India   IND    1964 55726873084.   7.45  3.11    5.75    3.76  487484535
## 6 India   IND    1965 58760424670.  -2.64  3.40    5.27    3.34  497702365

#view India's exports
india_econ |> autoplot(Exports)

#decompose the data to view features
dcmp <- india_econ |>
  model(stl = STL(Exports))
components(dcmp) |> autoplot()

The trend is the exports increase gradually with some bumps from 1960 to 2010. then it decreases after 2010.

Use an ETS(A,N,N) model to forecast the series, and plot the forecasts.

fitann <- india_econ |>
  model(
    ANN = ETS(Exports ~ error("A") + trend("N") + season("N"))
 )

fitann |>
  forecast(h = 10) |>
  autoplot(india_econ)+
  labs(title="India's Exports",
       y="Exports")

Compute the RMSE values for the training data.

#fitann |> augment()

#accuracy can provide the RMSE for a model using the residuals
fitann |> accuracy()

## # A tibble: 1 × 11
##   Country .model .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <fct>   <chr>  <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 India   ANN    Training 0.251  1.19 0.798  2.05  7.25 0.983 0.991 -0.0331

Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.) Discuss the merits of the two forecasting methods for this data set.

fitaan <- india_econ |>
  model(
    AAN = ETS(Exports ~ error("A") + trend("A") + season("N"))
 )
fitaan |>
  forecast(h = 10) |>
  autoplot(india_econ)+
  labs(title="India's Exports",
       y="Exports")

fitaan |> accuracy()

## # A tibble: 1 × 11
##   Country .model .type         ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <fct>   <chr>  <chr>      <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 India   AAN    Training -0.0501  1.18 0.736 0.239  6.98 0.906 0.978 -0.0294

the AAN model follow the most recent trend of the decrease in exports from 2010 onwards. it produces a slightly lower RMSE than the ANN model. in one way the ANN model is more optimistic in the amount of exports while the AAN is more pesimistic. They both have very wide 95% intervals.

d.Compare the forecasts from both methods. Which do you think is best?

A more conservative approach might be the AAN model since it expects exports to continue to lessen instead of improving. presenting this prediction may induce a policy change. The ANN model is basically naive showing little to no change from the last value. if only judging by the RMSE then the AAN model is slightly better.

f.Calculate a 95% prediction interval for the first forecast for each model, using the RMSE values and assuming normal errors. Compare your intervals with those produced using R.

following the calculation from question 1

#interval produced by R
  fitann |>
  forecast(h = 10) |>
  hilo()

## # A tsibble: 10 x 7 [1Y]
## # Key:       Country, .model [1]
##    Country .model  Year
##    <fct>   <chr>  <dbl>
##  1 India   ANN     2018
##  2 India   ANN     2019
##  3 India   ANN     2020
##  4 India   ANN     2021
##  5 India   ANN     2022
##  6 India   ANN     2023
##  7 India   ANN     2024
##  8 India   ANN     2025
##  9 India   ANN     2026
## 10 India   ANN     2027
## # ℹ 4 more variables: Exports <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>

#to pick out the 95% prediction interval
fitann2 <-  fitann |>
  forecast(h = 10) |>
  hilo()
accuracyann <- fitaan |> accuracy()

print(fitann2$`95%`[1])

## <hilo[1]>
## [1] [16.66831, 21.42248]95

#computed from +/- 1.96 * RMSE
print("computed 95% interval for A,N,N model")

## [1] "computed 95% interval for A,N,N model"

19.04539 + 1.96*accuracyann$RMSE[1]

## [1] 21.35036

19.04539 - 1.96*accuracyann$RMSE[1]

## [1] 16.74042

#interval produced by R
  fitaan |>
  forecast(h = 10) |>
  hilo()

## # A tsibble: 10 x 7 [1Y]
## # Key:       Country, .model [1]
##    Country .model  Year
##    <fct>   <chr>  <dbl>
##  1 India   AAN     2018
##  2 India   AAN     2019
##  3 India   AAN     2020
##  4 India   AAN     2021
##  5 India   AAN     2022
##  6 India   AAN     2023
##  7 India   AAN     2024
##  8 India   AAN     2025
##  9 India   AAN     2026
## 10 India   AAN     2027
## # ℹ 4 more variables: Exports <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>

#to pick out the 95% prediction interval
fitaan2 <-  fitaan |>
  forecast(h = 10) |>
  hilo()

print(fitaan2$`95%`[1])

## <hilo[1]>
## [1] [15.79928, 20.57683]95

accuracyaan <- fitaan |> accuracy()
print("computed 95% interval for A,A,N model")

## [1] "computed 95% interval for A,A,N model"

18.18805 + 1.96*accuracyaan$RMSE[1]

## [1] 20.49302

18.18805 - 1.96*accuracyaan$RMSE[1]

## [1] 15.88308

results of the 95% intervals calculated from RMSE differ by at most 0.1/15*100 or 0.67%

Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.

[Hint: use a relatively large value of h when forecasting, so you can clearly see the differences between the various options when plotting the forecasts.]

#season can not be applied to this data either A or M
chinagdp <- global_economy |> filter(Country == "China") |> select(Year, GDP)

chinaaan <- chinagdp |>
  model(
    ANN = ETS(GDP ~ error("A") + trend("A") + season("N"))
 )
chinaaan |>
  forecast(h = 100) |>
  autoplot(chinagdp)+
  labs(title="A,A,N China's GDP",
       y="GDP")

chinaaadn <- chinagdp |>
  model(
    AAN = ETS(GDP ~ error("A") + trend("Ad") + season("N"))
 )
chinaaadn |>
  forecast(h = 30) |>
  autoplot(chinagdp)+
  labs(title="A,Ad,N China's GDP",
       y="GDP")

chinamadn <- chinagdp |>
  model(
    MAN = ETS(GDP ~ error("M") + trend("Ad") + season("N"))
 )
chinamadn |>
  forecast(h = 30) |>
  autoplot(chinagdp)+
  labs(title="M,Ad,N China's GDP",
       y="GDP")

chinammn <- chinagdp |>
  model(
    MMN = ETS(GDP ~ error("M") + trend("M") + season("N"))
 )
chinammn |>
  forecast(h = 10) |>
  autoplot(chinagdp)+
  labs(title=" M,M,N China's GDP",
       y="GDP")

chinaamn <- chinagdp |>
  model(
    AMN = ETS(GDP ~ error("A") + trend("M") + season("N"))
 )
chinaamn |>
  forecast(h = 10) |>
  autoplot(chinagdp)+
  labs(title=" A,M,N China's GDP",
       y="GDP")

chinaann <- chinagdp |>
  model(
    ANN = ETS(GDP ~ error("A") + trend("N") + season("N"))
 )
chinaann |>
  forecast(h = 100) |>
  autoplot(chinagdp)+
  labs(title=" A,N,N China's GDP",
       y="GDP")

chinamnn <- chinagdp |>
  model(
    MNN = ETS(GDP ~ error("M") + trend("N") + season("N"))
 )
chinamnn |>
  forecast(h = 100) |>
  autoplot(chinagdp)+
  labs(title=" M,N,N China's GDP",
       y="GDP")

chinamadn |>forecast(h = 30) |>  hilo()

## # A tsibble: 30 x 6 [1Y]
## # Key:       .model [1]
##    .model  Year
##    <chr>  <dbl>
##  1 MAN     2018
##  2 MAN     2019
##  3 MAN     2020
##  4 MAN     2021
##  5 MAN     2022
##  6 MAN     2023
##  7 MAN     2024
##  8 MAN     2025
##  9 MAN     2026
## 10 MAN     2027
## # ℹ 20 more rows
## # ℹ 4 more variables: GDP <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>

chinaaadn |>forecast(h = 30) |>  hilo()

## # A tsibble: 30 x 6 [1Y]
## # Key:       .model [1]
##    .model  Year
##    <chr>  <dbl>
##  1 AAN     2018
##  2 AAN     2019
##  3 AAN     2020
##  4 AAN     2021
##  5 AAN     2022
##  6 AAN     2023
##  7 AAN     2024
##  8 AAN     2025
##  9 AAN     2026
## 10 AAN     2027
## # ℹ 20 more rows
## # ℹ 4 more variables: GDP <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>

an A,N,N is just a naive model. An A,A,N follows to most recent upward trend. dampening the trend in A,Ad,N at first follows the A,A,N trend but then the slope decreases or “dampens” for the longer forecasts. An M,N,N looks like naive or A,N,N but it has a wider 95% interval than A,N,N. A,Ad,N predicts a larger GDP than M,Ad,N however the predictions are very similar. M,M,N slope increases more rapidly than A,M,N. this may be due to M error emphasizing the most recent trend increase. MMN increases more rapidly and AMN

7.Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?

#generating the ETS model
gasdf <- aus_production |> select(Quarter, Gas)
fitgas <- gasdf |>
  model(ETS(Gas))
report(fitgas)

## Series: Gas 
## Model: ETS(M,A,M) 
##   Smoothing parameters:
##     alpha = 0.6528545 
##     beta  = 0.1441675 
##     gamma = 0.09784922 
## 
##   Initial states:
##      l[0]       b[0]      s[0]    s[-1]    s[-2]     s[-3]
##  5.945592 0.07062881 0.9309236 1.177883 1.074851 0.8163427
## 
##   sigma^2:  0.0032
## 
##      AIC     AICc      BIC 
## 1680.929 1681.794 1711.389

#decompose the model
components(fitgas) |>
  autoplot() +
  labs(title = "ETS components")

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).

#using the model to forcast.
fitgas |>
  forecast(h = 24) |>
  autoplot(gasdf)+
  labs(title="ETS model MAM gas",
       y="gas")

#errors in the model, residuals
glance(fitgas)

## # A tibble: 1 × 9
##   .model    sigma2 log_lik   AIC  AICc   BIC   MSE  AMSE    MAE
##   <chr>      <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 ETS(Gas) 0.00324   -831. 1681. 1682. 1711.  21.1  32.2 0.0413

report(fitgas)

## Series: Gas 
## Model: ETS(M,A,M) 
##   Smoothing parameters:
##     alpha = 0.6528545 
##     beta  = 0.1441675 
##     gamma = 0.09784922 
## 
##   Initial states:
##      l[0]       b[0]      s[0]    s[-1]    s[-2]     s[-3]
##  5.945592 0.07062881 0.9309236 1.177883 1.074851 0.8163427
## 
##   sigma^2:  0.0032
## 
##      AIC     AICc      BIC 
## 1680.929 1681.794 1711.389

fitgas |> accuracy()

## # A tibble: 1 × 10
##   .model   .type        ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>    <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 ETS(Gas) Training -0.115  4.60  3.02 0.199  4.08 0.542 0.606 -0.0131

#
fitgas  |> gg_tsresiduals(lag=24)

fitgas |>forecast(h = 24) |>  hilo()

## # A tsibble: 24 x 6 [1Q]
## # Key:       .model [1]
##    .model   Quarter
##    <chr>      <qtr>
##  1 ETS(Gas) 2010 Q3
##  2 ETS(Gas) 2010 Q4
##  3 ETS(Gas) 2011 Q1
##  4 ETS(Gas) 2011 Q2
##  5 ETS(Gas) 2011 Q3
##  6 ETS(Gas) 2011 Q4
##  7 ETS(Gas) 2012 Q1
##  8 ETS(Gas) 2012 Q2
##  9 ETS(Gas) 2012 Q3
## 10 ETS(Gas) 2012 Q4
## # ℹ 14 more rows
## # ℹ 4 more variables: Gas <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>

gasmadm <- gasdf |>
  model(
    MAM = ETS(Gas ~ error("M") + trend("Ad") + season("M"))
 )
gasmadm |>
  forecast(h = 24) |>
  autoplot(gasdf)+
  labs(title=" M,ad,M Gas",
       y="Gas")

glance(gasmadm)

## # A tibble: 1 × 9
##   .model  sigma2 log_lik   AIC  AICc   BIC   MSE  AMSE    MAE
##   <chr>    <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 MAM    0.00329   -832. 1684. 1685. 1718.  21.1  32.0 0.0417

report(gasmadm)

## Series: Gas 
## Model: ETS(M,Ad,M) 
##   Smoothing parameters:
##     alpha = 0.6489044 
##     beta  = 0.1551275 
##     gamma = 0.09369372 
##     phi   = 0.98 
## 
##   Initial states:
##      l[0]       b[0]      s[0]    s[-1]   s[-2]     s[-3]
##  5.858941 0.09944006 0.9281912 1.177903 1.07678 0.8171255
## 
##   sigma^2:  0.0033
## 
##      AIC     AICc      BIC 
## 1684.028 1685.091 1717.873

gasmadm |> accuracy()

## # A tibble: 1 × 10
##   .model .type          ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>  <chr>       <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 MAM    Training -0.00439  4.59  3.03 0.326  4.10 0.544 0.606 -0.0217

gasmadm  |> gg_tsresiduals(lag=24)

gasmadm |>forecast(h = 24) |>  hilo()

## # A tsibble: 24 x 6 [1Q]
## # Key:       .model [1]
##    .model Quarter
##    <chr>    <qtr>
##  1 MAM    2010 Q3
##  2 MAM    2010 Q4
##  3 MAM    2011 Q1
##  4 MAM    2011 Q2
##  5 MAM    2011 Q3
##  6 MAM    2011 Q4
##  7 MAM    2012 Q1
##  8 MAM    2012 Q2
##  9 MAM    2012 Q3
## 10 MAM    2012 Q4
## # ℹ 14 more rows
## # ℹ 4 more variables: Gas <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>

#compare to A seasonality 
gasmaa <- gasdf |>
  model(
    MAA = ETS(Gas ~ error("M") + trend("A") + season("A"))
 )
gasmaa |>
  forecast(h = 24) |>
  autoplot(gasdf)+
  labs(title=" M,A,A Gas",
       y="Gas")

glance(gasmaa)

## # A tibble: 1 × 9
##   .model sigma2 log_lik   AIC  AICc   BIC   MSE  AMSE    MAE
##   <chr>   <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 MAA    0.0430  -1120. 2257. 2258. 2288.  40.9  64.0 0.0844

report(gasmaa)

## Series: Gas 
## Model: ETS(M,A,A) 
##   Smoothing parameters:
##     alpha = 0.291845 
##     beta  = 0.2918449 
##     gamma = 0.4904944 
## 
##   Initial states:
##      l[0]      b[0]      s[0]    s[-1]    s[-2]     s[-3]
##  12.47885 -3.669884 -9.763467 23.34845 6.323571 -19.90855
## 
##   sigma^2:  0.043
## 
##      AIC     AICc      BIC 
## 2257.099 2257.964 2287.559

gasmaa |> accuracy()

## # A tibble: 1 × 10
##   .model .type        ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MAA    Training 0.0508  6.39  4.03  2.02  14.5 0.723 0.843 0.226

gasmaa |>forecast(h = 24) |>  hilo()

## # A tsibble: 24 x 6 [1Q]
## # Key:       .model [1]
##    .model Quarter
##    <chr>    <qtr>
##  1 MAA    2010 Q3
##  2 MAA    2010 Q4
##  3 MAA    2011 Q1
##  4 MAA    2011 Q2
##  5 MAA    2011 Q3
##  6 MAA    2011 Q4
##  7 MAA    2012 Q1
##  8 MAA    2012 Q2
##  9 MAA    2012 Q3
## 10 MAA    2012 Q4
## # ℹ 14 more rows
## # ℹ 4 more variables: Gas <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>

#N seasonality
gasman <- gasdf |>
  model(
    MAN = ETS(Gas ~ error("M") + trend("A") + season("N"))
 )
gasman |>
  forecast(h = 24) |>
  autoplot(gasdf)+
  labs(title=" M,A,N Gas",
       y="Gas")

glance(gasman)

## # A tibble: 1 × 9
##   .model sigma2 log_lik   AIC  AICc   BIC   MSE  AMSE   MAE
##   <chr>   <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 MAN    0.0266  -1064. 2138. 2138. 2154.  268.  249. 0.136

report(gasman)

## Series: Gas 
## Model: ETS(M,A,N) 
##   Smoothing parameters:
##     alpha = 0.111075 
##     beta  = 0.1018672 
## 
##   Initial states:
##      l[0]      b[0]
##  5.761939 0.0725193
## 
##   sigma^2:  0.0266
## 
##      AIC     AICc      BIC 
## 2137.521 2137.804 2154.443

gasman |> accuracy()

## # A tibble: 1 × 10
##   .model .type         ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>  <chr>      <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 MAN    Training -0.0787  16.4  11.9 -1.32  13.5  2.14  2.16 0.0888

gasman |>forecast(h = 24) |>  hilo()

## # A tsibble: 24 x 6 [1Q]
## # Key:       .model [1]
##    .model Quarter
##    <chr>    <qtr>
##  1 MAN    2010 Q3
##  2 MAN    2010 Q4
##  3 MAN    2011 Q1
##  4 MAN    2011 Q2
##  5 MAN    2011 Q3
##  6 MAN    2011 Q4
##  7 MAN    2012 Q1
##  8 MAN    2012 Q2
##  9 MAN    2012 Q3
## 10 MAN    2012 Q4
## # ℹ 14 more rows
## # ℹ 4 more variables: Gas <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>

Akaike’s Information Criterion (AIC) Mean Absolute error (MAE) MAM: AIC: 1680.929, RMSE: 4.595113, MAE: 0.04134491 MAdM: AIC: 1684.091, RMSE: 4.59184, MAE: 0.04165944 MAA: AIC: 2257.099, RMSE: 6.391643, MAE:0.08440083 MAN: AIC: 2137.521, RMSE: 16.35735, MAE: 0.1356131

Damping the trend reduces the RMSE slightly however it also raises the AIC. preference goes to the lower AIC for automated model selection. Also the mean absolute error for the undamped M,A,M model is less. However the models(M,A,M and M,Ad,M) are roughly equivalent.

the preference for Multiplicative seasonality is indicative of a seasonality that scales with the data. for an additive the seasonality has little variation despite the changing trend in the data. removing seasonality from the model drastically increases the MAE, AIC, and RMSE. changing it to additive also yields worse residuals however it is better than a model with none for seasonality.

Recall your retail time series data

#info from 2.10 number 7
set.seed(987654321)
myseries <- aus_retail |>
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))
#breaking down the data 
myseries |> autoplot()

## Plot variable not specified, automatically selected `.vars = Turnover`

myseries |> gg_season()

## Plot variable not specified, automatically selected `y = Turnover`

myseries |> gg_subseries()

## Plot variable not specified, automatically selected `y = Turnover`

myseries |> gg_lag()

## Plot variable not specified, automatically selected `y = Turnover`

Why is multiplicative seasonality necessary for this series?

the seasonality scales with the level or trend of the data. at the beginning when the data has low values its variation is also low. as the values increase the difference between the min and max for a year also increase. with this change in seasonality a multiplicative seasonality is necessary for this series. if the seasonality differences were consistant throughout then an additive seasonality would be more appropriate.

Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.

#multiplicative holt winters
 fitmam <- myseries |> model(
    
    multiplicative = ETS(Turnover ~ error("M") + trend("A") +
                                                season("M"))
  )
#trend damped multiplicative holt winters
fitmadm <- myseries |> model( MAdM = 
                                ETS(Turnover ~ error("M") + trend("Ad") + season("M"))
                              )
# models trained then forcast
fcmam <- fitmam |> forecast(h = "10 years")
fcmadm  <- fitmadm |> forecast(h = "10 years")
#plot data with forcast
fcmam |>
  autoplot(myseries) +
  labs(title="retail M,A,M",
       y="turnover")

fcmadm |>
  autoplot(myseries) +
  labs(title="retail M,Ad,M",
       y="turnover")

Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?

#statistics on MAM and MAdM models
fitmam |> accuracy()

## # A tibble: 1 × 12
##   State Industry .model .type      ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr> <chr>    <chr>  <chr>   <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Vict… Other r… multi… Trai… -0.0589  5.13  3.67 -0.510  5.49 0.658 0.711 0.309

fitmam |> report()

## Series: Turnover 
## Model: ETS(M,A,M) 
##   Smoothing parameters:
##     alpha = 0.2977895 
##     beta  = 0.0001025242 
##     gamma = 0.2942524 
## 
##   Initial states:
##      l[0]      b[0]      s[0]     s[-1]     s[-2]    s[-3]    s[-4]    s[-5]
##  16.23251 0.1903941 0.9590884 0.9116755 0.8968424 1.306677 1.070696 1.029606
##      s[-6]     s[-7]    s[-8]     s[-9]   s[-10]    s[-11]
##  0.9450492 0.9897274 0.962153 0.9419827 1.003206 0.9832971
## 
##   sigma^2:  0.0054
## 
##      AIC     AICc      BIC 
## 3971.986 3973.433 4041.500

fcmam |> hilo()

## # A tsibble: 120 x 8 [1M]
## # Key:       State, Industry, .model [1]
##    State    Industry                           .model            Month
##    <chr>    <chr>                              <chr>             <mth>
##  1 Victoria Other recreational goods retailing multiplicative 2019 Jan
##  2 Victoria Other recreational goods retailing multiplicative 2019 Feb
##  3 Victoria Other recreational goods retailing multiplicative 2019 Mar
##  4 Victoria Other recreational goods retailing multiplicative 2019 Apr
##  5 Victoria Other recreational goods retailing multiplicative 2019 May
##  6 Victoria Other recreational goods retailing multiplicative 2019 Jun
##  7 Victoria Other recreational goods retailing multiplicative 2019 Jul
##  8 Victoria Other recreational goods retailing multiplicative 2019 Aug
##  9 Victoria Other recreational goods retailing multiplicative 2019 Sep
## 10 Victoria Other recreational goods retailing multiplicative 2019 Oct
## # ℹ 110 more rows
## # ℹ 4 more variables: Turnover <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>

fitmadm |> accuracy()

## # A tibble: 1 × 12
##   State    Industry .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>    <chr>    <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Victoria Other r… MAdM   Trai… 0.344  5.16  3.69 0.232  5.49 0.660 0.716 0.306

fitmadm |> report()

## Series: Turnover 
## Model: ETS(M,Ad,M) 
##   Smoothing parameters:
##     alpha = 0.2994731 
##     beta  = 0.003000567 
##     gamma = 0.3123052 
##     phi   = 0.9799883 
## 
##   Initial states:
##      l[0]      b[0]      s[0]     s[-1]     s[-2]    s[-3]    s[-4]   s[-5]
##  15.73264 0.1849834 0.9487417 0.9006466 0.9090399 1.320031 1.059082 1.01368
##      s[-6]     s[-7]     s[-8]     s[-9]   s[-10]    s[-11]
##  0.9311945 0.9850626 0.9719544 0.9556592 1.039885 0.9650238
## 
##   sigma^2:  0.0056
## 
##      AIC     AICc      BIC 
## 3983.180 3984.801 4056.783

fcmadm |> hilo()

## # A tsibble: 120 x 8 [1M]
## # Key:       State, Industry, .model [1]
##    State    Industry                           .model    Month
##    <chr>    <chr>                              <chr>     <mth>
##  1 Victoria Other recreational goods retailing MAdM   2019 Jan
##  2 Victoria Other recreational goods retailing MAdM   2019 Feb
##  3 Victoria Other recreational goods retailing MAdM   2019 Mar
##  4 Victoria Other recreational goods retailing MAdM   2019 Apr
##  5 Victoria Other recreational goods retailing MAdM   2019 May
##  6 Victoria Other recreational goods retailing MAdM   2019 Jun
##  7 Victoria Other recreational goods retailing MAdM   2019 Jul
##  8 Victoria Other recreational goods retailing MAdM   2019 Aug
##  9 Victoria Other recreational goods retailing MAdM   2019 Sep
## 10 Victoria Other recreational goods retailing MAdM   2019 Oct
## # ℹ 110 more rows
## # ℹ 4 more variables: Turnover <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>

M,A,M RMSE: 5.125605 M,Ad,M RMSE: 5.159722

The RMSE is very similar between the two models. despite the M,A,M having a slightly lower RMSE I prefer the M,Ad, M model because it follows the later downward trend better.

Check that the residuals from the best method look like white noise.

#residuals of the MAM and MAdM models
fitmam  |> gg_tsresiduals(lag=20)

fitmadm |>  gg_tsresiduals(lag=20)

#checking if MAM is the best model
fitauto <- myseries |> model(ETS(Turnover))
fitauto |> report()

## Series: Turnover 
## Model: ETS(M,A,M) 
##   Smoothing parameters:
##     alpha = 0.2977895 
##     beta  = 0.0001025242 
##     gamma = 0.2942524 
## 
##   Initial states:
##      l[0]      b[0]      s[0]     s[-1]     s[-2]    s[-3]    s[-4]    s[-5]
##  16.23251 0.1903941 0.9590884 0.9116755 0.8968424 1.306677 1.070696 1.029606
##      s[-6]     s[-7]    s[-8]     s[-9]   s[-10]    s[-11]
##  0.9450492 0.9897274 0.962153 0.9419827 1.003206 0.9832971
## 
##   sigma^2:  0.0054
## 
##      AIC     AICc      BIC 
## 3971.986 3973.433 4041.500

fitauto |> gg_tsresiduals(lag=20)

augment(fitauto) |>
  features(.innov, ljung_box, lag = 20)

## # A tibble: 1 × 5
##   State    Industry                           .model        lb_stat lb_pvalue
##   <chr>    <chr>                              <chr>           <dbl>     <dbl>
## 1 Victoria Other recreational goods retailing ETS(Turnover)    123.  1.11e-16

automated ETS model selects the M,A,M model.

residuals look right skewed. there are several residuals that pass the acf threshold. the autocorrelation is not close to zero for several lags. Leung box p value is significantly less than 0.05 so there is autocorrelation.

Now find the test set RMSE, while training the model to the end of 2010. Can you beat the seasonal naïve approach from Exercise 7 in Section 5.11?

train <- myseries |>
  filter_index("1982" ~ "2010")

## Warning: There were 2 warnings in `filter()`.
## The first warning was:
## ℹ In argument: `time_in(Month, ...)`.
## Caused by warning:
## ! `yearmonth()` may yield unexpected results.
## ℹ Please use arg `format` to supply formats.
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.

fit2010 <- train |> model(ETS(Turnover))

fc2010 <- fit2010 |> forecast(h= "9 years")
fc2010 |>
  autoplot(myseries) +
  labs(title="2010 train",
       y="turnover")

fitsnaive <- train |>
  model(SNAIVE(Turnover))
fcsnaive <- fitsnaive |> forecast(h= "9 years")

fcsnaive |>
  autoplot(myseries) +
  labs(title="Snaive",
       y="turnover")

fitsnaive |> report()

## Series: Turnover 
## Model: SNAIVE 
## 
## sigma^2: 41.1598

fitsnaive |> gg_tsresiduals(lag=108)

## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).

## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).

## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_bin()`).

augment(fitsnaive) |>
  features(.innov, ljung_box, lag = 108)

## # A tibble: 1 × 5
##   State    Industry                           .model           lb_stat lb_pvalue
##   <chr>    <chr>                              <chr>              <dbl>     <dbl>
## 1 Victoria Other recreational goods retailing SNAIVE(Turnover)    803.         0

fit2010 |> report()

## Series: Turnover 
## Model: ETS(M,A,M) 
##   Smoothing parameters:
##     alpha = 0.2878128 
##     beta  = 0.000100698 
##     gamma = 0.289654 
## 
##   Initial states:
##      l[0]      b[0]      s[0]     s[-1]     s[-2]    s[-3]    s[-4]    s[-5]
##  16.30871 0.2252655 0.9588057 0.8953244 0.9139321 1.337444 1.050725 1.004981
##      s[-6]    s[-7]     s[-8]    s[-9]   s[-10]    s[-11]
##  0.9867994 0.978916 0.9678668 0.970458 1.010486 0.9242624
## 
##   sigma^2:  0.0048
## 
##      AIC     AICc      BIC 
## 2805.139 2807.076 2869.928

fit2010 |> gg_tsresiduals(lag=108)

augment(fit2010) |>
  features(.innov, ljung_box, lag = 108)

## # A tibble: 1 × 5
##   State    Industry                           .model        lb_stat lb_pvalue
##   <chr>    <chr>                              <chr>           <dbl>     <dbl>
## 1 Victoria Other recreational goods retailing ETS(Turnover)    250.  2.80e-13

well according to the p value from the ljung_box the exponential smoothing model is less autocorrelated than the Seasonal Naive model. still not white noise.

fit2010 |> accuracy()

## # A tibble: 1 × 12
##   State  Industry .model .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>    <chr>  <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Victo… Other r… ETS(T… Trai… 0.0361  4.39  3.13 -0.440  5.17 0.583 0.626 0.316

fc2010 |> hilo()

## # A tsibble: 108 x 8 [1M]
## # Key:       State, Industry, .model [1]
##    State    Industry                           .model           Month
##    <chr>    <chr>                              <chr>            <mth>
##  1 Victoria Other recreational goods retailing ETS(Turnover) 2010 Feb
##  2 Victoria Other recreational goods retailing ETS(Turnover) 2010 Mar
##  3 Victoria Other recreational goods retailing ETS(Turnover) 2010 Apr
##  4 Victoria Other recreational goods retailing ETS(Turnover) 2010 May
##  5 Victoria Other recreational goods retailing ETS(Turnover) 2010 Jun
##  6 Victoria Other recreational goods retailing ETS(Turnover) 2010 Jul
##  7 Victoria Other recreational goods retailing ETS(Turnover) 2010 Aug
##  8 Victoria Other recreational goods retailing ETS(Turnover) 2010 Sep
##  9 Victoria Other recreational goods retailing ETS(Turnover) 2010 Oct
## 10 Victoria Other recreational goods retailing ETS(Turnover) 2010 Nov
## # ℹ 98 more rows
## # ℹ 4 more variables: Turnover <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>

fitsnaive |> accuracy()

## # A tibble: 1 × 12
##   State    Industry .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>    <chr>    <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Victoria Other r… SNAIV… Trai…  2.85  7.01  5.37  5.76  9.02     1     1 0.647

fcsnaive |> hilo()

## # A tsibble: 108 x 8 [1M]
## # Key:       State, Industry, .model [1]
##    State    Industry                           .model              Month
##    <chr>    <chr>                              <chr>               <mth>
##  1 Victoria Other recreational goods retailing SNAIVE(Turnover) 2010 Feb
##  2 Victoria Other recreational goods retailing SNAIVE(Turnover) 2010 Mar
##  3 Victoria Other recreational goods retailing SNAIVE(Turnover) 2010 Apr
##  4 Victoria Other recreational goods retailing SNAIVE(Turnover) 2010 May
##  5 Victoria Other recreational goods retailing SNAIVE(Turnover) 2010 Jun
##  6 Victoria Other recreational goods retailing SNAIVE(Turnover) 2010 Jul
##  7 Victoria Other recreational goods retailing SNAIVE(Turnover) 2010 Aug
##  8 Victoria Other recreational goods retailing SNAIVE(Turnover) 2010 Sep
##  9 Victoria Other recreational goods retailing SNAIVE(Turnover) 2010 Oct
## 10 Victoria Other recreational goods retailing SNAIVE(Turnover) 2010 Nov
## # ℹ 98 more rows
## # ℹ 4 more variables: Turnover <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>

the exponential smoothing model (M,A,M) has a smaller RMSE than the Seasonal Naive model.

9.For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?

#select the best lambda for box cox
lambda <- myseries |>
  features(Turnover, features = guerrero) |>
  pull(lambda_guerrero)
#plot the box cox transformed data
myseries |>
  autoplot(box_cox(Turnover, lambda)) +
  labs(y = "Turnover",
       title = latex2exp::TeX(paste0(
         "Transformed Turnover with $\\lambda$ = ",
         round(lambda,2))))

#store the box cox transformated values into the data frame
tmyseries <- myseries |> mutate(tturnover = box_cox(Turnover, lambda) )
#stl of box cox transformed series
tmyseries |>
  model(
    STL(tturnover ~ trend(window = 7) +
                   season(window = "periodic"),
    robust = TRUE)) |>
  components() |>
  autoplot()

#ETS model of boxcox transformed data
fitbox <- tmyseries |> model(ETS(tturnover))
fcbox <- fitbox |> forecast(h = "9 years")
#visualize the 
fcbox |> 
autoplot(tmyseries) +
  labs(title = "boxcox tmyseries")

fcbox |> hilo()

## # A tsibble: 108 x 8 [1M]
## # Key:       State, Industry, .model [1]
##    State    Industry                           .model            Month
##    <chr>    <chr>                              <chr>             <mth>
##  1 Victoria Other recreational goods retailing ETS(tturnover) 2019 Jan
##  2 Victoria Other recreational goods retailing ETS(tturnover) 2019 Feb
##  3 Victoria Other recreational goods retailing ETS(tturnover) 2019 Mar
##  4 Victoria Other recreational goods retailing ETS(tturnover) 2019 Apr
##  5 Victoria Other recreational goods retailing ETS(tturnover) 2019 May
##  6 Victoria Other recreational goods retailing ETS(tturnover) 2019 Jun
##  7 Victoria Other recreational goods retailing ETS(tturnover) 2019 Jul
##  8 Victoria Other recreational goods retailing ETS(tturnover) 2019 Aug
##  9 Victoria Other recreational goods retailing ETS(tturnover) 2019 Sep
## 10 Victoria Other recreational goods retailing ETS(tturnover) 2019 Oct
## # ℹ 98 more rows
## # ℹ 4 more variables: tturnover <dist>, .mean <dbl>, `80%` <hilo>, `95%` <hilo>

fitbox |> accuracy()

## # A tibble: 1 × 12
##   State   Industry .model .type       ME   RMSE    MAE     MPE  MAPE  MASE RMSSE
##   <chr>   <chr>    <chr>  <chr>    <dbl>  <dbl>  <dbl>   <dbl> <dbl> <dbl> <dbl>
## 1 Victor… Other r… ETS(t… Trai… -0.00140 0.0243 0.0187 -0.0614 0.770 0.590 0.603
## # ℹ 1 more variable: ACF1 <dbl>

fitbox |> gg_tsresiduals(lag=108)

augment(fitbox) |>
  features(.innov, ljung_box, lag = 108)

## # A tibble: 1 × 5
##   State    Industry                           .model         lb_stat lb_pvalue
##   <chr>    <chr>                              <chr>            <dbl>     <dbl>
## 1 Victoria Other recreational goods retailing ETS(tturnover)    269.  1.11e-15

#trying to create a model that will work on the untransformed data
fitbox2 <- myseries |> model(ETS(box_cox(Turnover, lambda)))
#forcast of 9 years after the end of the data
fcbox2 <- fitbox2 |> forecast(h = "9 years")
#visualize the forecasted model
fcbox2 |> 
autoplot(myseries) +
  labs(title = "boxcox myseries")

#check the residual statistics of the model on the data set.
fitbox2 |> accuracy()

## # A tibble: 1 × 12
##   State  Industry .model .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr>  <chr>    <chr>  <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Victo… Other r… ETS(b… Trai… -0.300  5.34  3.76 -0.678  5.52 0.673 0.741 0.270

fitbox2 |> gg_tsresiduals(lag=108)

augment(fitbox2) |>
  features(.innov, ljung_box, lag = 108)

## # A tibble: 1 × 5
##   State    Industry                           .model           lb_stat lb_pvalue
##   <chr>    <chr>                              <chr>              <dbl>     <dbl>
## 1 Victoria Other recreational goods retailing ETS(box_cox(Tur…    269.  1.11e-15

still a lot of autocorrelation even when the model was trained on the box cox transformed data. RMSE is 5.340485 which is lower than the Snaive but higher than the MAM untransformed model. judging by MAE and other values the MAM model on the untransformed values seems to be best.

Data 624 assignment 5

Keith DeNivo

2024-11-21