HW 5

Questions

Exercise 1

pig <- aus_livestock %>% 
  filter(Animal=="Pigs") %>% 
  filter(State=="Victoria")
view(pig)
autoplot(pig)

## Plot variable not specified, automatically selected `.vars = Count`

pigcurrent <- pig %>% 
  slice(451:558)
pig %>%
  model(
    classical_decomposition(Count, type = "additive")
  ) %>%
  components() %>%
  autoplot()

## Warning: Removed 6 row(s) containing missing values (geom_path).

fit <- pig %>%
  model(ETS(Count ~ error("A") + trend("N") + season("N")))
report(fit)

## Series: Count 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.3221247 
## 
##   Initial states:
##      l[0]
##  100646.6
## 
##   sigma^2:  87480760
## 
##      AIC     AICc      BIC 
## 13737.10 13737.14 13750.07

fc <- fit %>% 
  forecast(h=4)
fc

## # A fable: 4 x 6 [1M]
## # Key:     Animal, State, .model [1]
##   Animal State    .model                          Month             Count  .mean
##   <fct>  <fct>    <chr>                           <mth>            <dist>  <dbl>
## 1 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Jan N(95187, 8.7e+07) 95187.
## 2 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Feb N(95187, 9.7e+07) 95187.
## 3 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Mar N(95187, 1.1e+08) 95187.
## 4 Pigs   Victoria "ETS(Count ~ error(\"A\") +… 2019 Apr N(95187, 1.1e+08) 95187.

fc %>%
  autoplot(pigcurrent) +
  geom_line(aes(y = .fitted), col="#D55E00",
            data = augment(fit))

#calculating prediction interval for h=4:
resid <- augment(fit)
sd <- sd(resid$.resid)
#sd= 9344.665
#y estimate= 95.186.56
y <- fc$.mean[4]
#lower
lower <- y-(1.96*sd)
#upper
upper <- y+1.96*sd
lower

## [1] 76871.01

upper

## [1] 113502.1

#r produced interval
hilo(fc) %>% 
  select("95%")

## # A tsibble: 4 x 2 [1M]
##                    `95%`    Month
##                   <hilo>    <mth>
## 1 [76854.79, 113518.3]95 2019 Jan
## 2 [75927.17, 114445.9]95 2019 Feb
## 3 [75042.22, 115330.9]95 2019 Mar
## 4 [74194.54, 116178.6]95 2019 Apr

#these are really close to the ones that I calculated by hand. This is probably a good thing.

Exercise 2

china <- global_economy %>% 
  filter(Country== "China") %>% 
  select("Year", "Exports")
autoplot(china)
#data, which starts at 1960, shows a clear upward trend. this is due to China's aggressive policies pushing towards economic growth. The trend begins in 1970 and reaches its peak at around 2005. It looks like a decreasing trend begins at around the great recession, and it continues to decrease. 
fit <- china %>%
  model(
    ANN = ETS(Exports~ error("A") + trend("N") + season("N"))
  )
fc <- fit %>% forecast(h = 10)
fc
fc %>%
  autoplot(china) +
  geom_line(aes(y = .fitted), col="#D55E00",
            data = augment(fit))
accuracy(fit)
fit2 <- china %>% 
    model(
    AAN = ETS(Exports~ error("A") + trend("A") + season("N"))
  )
fc2 <- fit2 %>% forecast(h=10)
fc2
  accuracy(fit2)
  #it looks like the AAN model performs just slightly better. This is suprising because the data showed a clear upward trend, maybe the fact that it had a big downward trend in 2008 had a big effect on the accuracy. Still, I wouldve thougth that the additive trend would have benefitted the accuracy. 
  
#I think the first forecast is more accurate because I do not expect China's exports to decrease as time goes on, which is what the second forecast predicts. Clearly the trend is too volatile to be used. Looking at the current data, there was actually an increase in china's exports relative to 2017. 
  
#what do we do with the rmse values?

Exercise 3

chin <- global_economy %>% 
  filter(Country=="China") %>% 
  select(Country, GDP)
autoplot(chin)

## Plot variable not specified, automatically selected `.vars = GDP`

chinbc <- chin %>%
  features(GDP, features = guerrero) %>%
  pull(lambda_guerrero)
chin%>%
  autoplot(box_cox(GDP, chinbc))

fit3 <- chin %>%
  model(ETS(GDP))
fc3 <- fit3 %>%
  forecast(h = 20)
report(fit3)

## Series: GDP 
## Model: ETS(M,A,N) 
##   Smoothing parameters:
##     alpha = 0.9998998 
##     beta  = 0.3119984 
## 
##   Initial states:
##         l[0]       b[0]
##  45713434615 3288256682
## 
##   sigma^2:  0.0108
## 
##      AIC     AICc      BIC 
## 3102.064 3103.218 3112.366

fc3 %>% 
  autoplot(chin) +
  geom_line(aes(y = .fitted), col="#D55E00",
            data = augment(fit3))

fitbc <- chin %>% 
  model(ETS(box_cox(GDP,chinbc)))
fcbc <- fitbc %>% 
  forecast(h=20)
report(fitbc)

## Series: GDP 
## Model: ETS(A,A,N) 
## Transformation: box_cox(GDP, chinbc) 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.07258856 
## 
##   Initial states:
##      l[0]       b[0]
##  16.64919 0.02639696
## 
##   sigma^2:  0.0014
## 
##       AIC      AICc       BIC 
## -138.5192 -137.3653 -128.2170

fcbc %>% 
  autoplot(chin) +
  geom_line(aes(y = .fitted), col="#D55E00",
            data = augment(fitbc))

fit3a <- chin %>% 
  model(ETS(GDP~error("A")+trend("Ad")+season("N")))
fc3a <- fit3a %>% 
  forecast(h=20)
report(fit3a)

## Series: GDP 
## Model: ETS(A,Ad,N) 
##   Smoothing parameters:
##     alpha = 0.9998747 
##     beta  = 0.5634078 
##     phi   = 0.9799999 
## 
##   Initial states:
##         l[0]       b[0]
##  50284778075 3288256683
## 
##   sigma^2:  3.959258e+22
## 
##      AIC     AICc      BIC 
## 3260.187 3261.834 3272.549

fc3a %>% 
  autoplot(chin) +
  geom_line(aes(y = .fitted), col="#D55E00",
            data = augment(fit3a))

#Holy shit multiplicative errors really do widen your prediction intervals. The box cox model has the lowest Aicc, however I'm not sure that I would use the multiplicative errors, because GDP isnt seasonal. I think the following model will be the best. 

fitbc2 <- chin %>% 
  model(ETS(box_cox(GDP,chinbc)~error("A")+trend("Ad")+season("N")))
fcbc2 <- fitbc2 %>% 
  forecast(h=20)
report(fitbc2)

## Series: GDP 
## Model: ETS(A,Ad,N) 
## Transformation: box_cox(GDP, chinbc) 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.3554223 
##     phi   = 0.9200943 
## 
##   Initial states:
##      l[0]        b[0]
##  16.69879 -0.02541932
## 
##   sigma^2:  0.0015
## 
##       AIC      AICc       BIC 
## -134.8815 -133.2345 -122.5189

fcbc2 %>% 
  autoplot(chin)+
  geom_line(aes(y=.fitted),col="green",
            data=augment(fitbc2))

#even with the additive errors my prediction interval grows so much over time... I trust this one the most though for sure.

For more math examples, please check this website

Exercise 4

gas <- aus_production %>% 
  select(Gas)
autoplot(gas)

## Plot variable not specified, automatically selected `.vars = Gas`

#clearly needs a transformation
gaslambda <- gas %>% 
  features(Gas, features=guerrero) %>% 
  pull(lambda_guerrero)
gas %>%
  autoplot(box_cox(Gas, gaslambda))

#fitted values/ model
gasfit <- gas %>% 
  model(ETS(box_cox(Gas,gaslambda)))
gg_tsresiduals(gasfit)

#autocorrelation, not normally distributed.
report(gasfit)

## Series: Gas 
## Model: ETS(A,A,A) 
## Transformation: box_cox(Gas, gaslambda) 
##   Smoothing parameters:
##     alpha = 0.8678852 
##     beta  = 0.05714432 
##     gamma = 0.0334369 
## 
##   Initial states:
##      l[0]       b[0]       s[0]     s[-1]     s[-2]      s[-3]
##  1.935509 0.01219028 -0.1040305 0.2453879 0.1145641 -0.2559215
## 
##   sigma^2:  0.0064
## 
##       AIC      AICc       BIC 
##  81.64539  82.51077 112.10584

gasfc <- gasfit %>% 
  forecast(h=36)
gasfc %>% 
  autoplot(gas)+
  geom_line(aes(y=.fitted), color = "green",
            data=augment(gasfit))

#if i dont do the box cox, we need to do multiplicative because the level of the seasonality changes over time.
gasfit1 <- gas %>% 
  model(ETS(Gas))
gg_tsresiduals(gasfit1)

#a little heteroskedasticity, a lot of acf, but it looks normally distributed. 
report(gasfit1)

## Series: Gas 
## Model: ETS(M,A,M) 
##   Smoothing parameters:
##     alpha = 0.6528545 
##     beta  = 0.1441675 
##     gamma = 0.09784922 
## 
##   Initial states:
##      l[0]       b[0]      s[0]    s[-1]    s[-2]     s[-3]
##  5.945592 0.07062881 0.9309236 1.177883 1.074851 0.8163427
## 
##   sigma^2:  0.0032
## 
##      AIC     AICc      BIC 
## 1680.929 1681.794 1711.389

gasfc1 <- gasfit1 %>% 
  forecast(h=36)
gasfc1 %>% 
  autoplot(gas)+
  geom_line(aes(y=.fitted), color = "green",
            data=augment(gasfit1))

gasfit2 <- gas %>% 
  model(ETS(Gas~error("A")+trend("Ad")+season("M")))
gg_tsresiduals(gasfit2)

report(gasfit2)

## Series: Gas 
## Model: ETS(A,Ad,M) 
##   Smoothing parameters:
##     alpha = 0.6100606 
##     beta  = 0.02485618 
##     gamma = 0.2225287 
##     phi   = 0.98 
## 
##   Initial states:
##      l[0]        b[0]      s[0]    s[-1]    s[-2]     s[-3]
##  5.643513 -0.06179413 0.9254594 1.060244 1.124238 0.8900577
## 
##   sigma^2:  18.5452
## 
##      AIC     AICc      BIC 
## 1821.235 1822.297 1855.080

#there is severe heteroskedasticity.not normally distributed. not much autocorrelation tho.  
gasfc2 <- gasfit2 %>% 
  forecast(h=36)
gasfc2 %>% 
  autoplot(gas)+
  geom_line(aes(y=.fitted), color="green",
            data=augment(gasfit2))

###5

set.seed(123333)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
myseries %>% autoplot(Turnover)

#we need to use multiplicative because the level of the seasonality changes with the data. 
fitseries <- myseries %>% 
  model(ETS(Turnover ~ error("M") + trend("A") + season("M")))
fcseries <- fitseries %>% 
  forecast(h=36)
fcseries %>% 
  autoplot(myseries)+
  geom_line(aes(y=.fitted), color="green",
            data=augment(fitseries))

report(fitseries)

## Series: Turnover 
## Model: ETS(M,A,M) 
##   Smoothing parameters:
##     alpha = 0.4385116 
##     beta  = 0.0001345133 
##     gamma = 0.0001015592 
## 
##   Initial states:
##      l[0]      b[0]      s[0]     s[-1]     s[-2]    s[-3]    s[-4]     s[-5]
##  35.70144 0.4268796 0.9768357 0.8963407 0.9465278 1.352665 1.046269 0.9954085
##      s[-6]     s[-7]     s[-8]    s[-9]    s[-10]    s[-11]
##  0.9707365 0.9830932 0.9726422 0.939769 0.9785438 0.9411683
## 
##   sigma^2:  0.0013
## 
##      AIC     AICc      BIC 
## 3976.603 3978.050 4046.116

fitseries2 <- myseries %>% 
  model(ETS(Turnover ~ error("M") + trend("Ad") + season("M")))
fcseries2 <- fitseries2 %>% 
  forecast(h=36)
fcseries2 %>% 
  autoplot(myseries)+
  geom_line(aes(y=.fitted), color="green",
            data=augment(fitseries2))

report(fitseries2)

## Series: Turnover 
## Model: ETS(M,Ad,M) 
##   Smoothing parameters:
##     alpha = 0.458572 
##     beta  = 0.01128426 
##     gamma = 0.0001001574 
##     phi   = 0.9772211 
## 
##   Initial states:
##      l[0]      b[0]      s[0]     s[-1]     s[-2]    s[-3]    s[-4]     s[-5]
##  36.02009 0.4814974 0.9821001 0.8995871 0.9443604 1.347288 1.040544 0.9938802
##      s[-6]     s[-7]     s[-8]     s[-9]    s[-10]   s[-11]
##  0.9702889 0.9820467 0.9729883 0.9434179 0.9817436 0.941755
## 
##   sigma^2:  0.0013
## 
##      AIC     AICc      BIC 
## 3989.712 3991.333 4063.315

accuracy(fitseries)

## # A tibble: 1 × 12
##   State Industry .model .type    ME  RMSE   MAE     MPE  MAPE  MASE RMSSE   ACF1
##   <chr> <chr>    <chr>  <chr> <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 Sout… Other r… "ETS(… Trai… 0.152  5.45  3.80 -0.0486  2.82 0.458 0.508 0.0440

#RMSE= 5.451617
accuracy(fitseries2)

## # A tibble: 1 × 12
##   State   Industry .model .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
##   <chr>   <chr>    <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
## 1 South … Other r… "ETS(… Trai… 0.482  5.48  3.82 0.325  2.84 0.461 0.511 0.0207

#rsme =5.478495
#i prefer the one with the lower RMSE(1) but Its not a big difference. 

gg_tsresiduals(fitseries)

#looks good to be except we have low levels of ac.
trainms <- myseries %>% 
  slice(1:346)
fitms <- trainms %>% 
  model(ETS(Turnover ~ error("M") + trend("Ad") + season("M")))
fcms <- fitms %>% 
  forecast(h=36)
testms <- myseries %>% 
  slice(347:441)
accuracy(fcms,testms)

## # A tibble: 1 × 12
##   .model    State Industry .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>     <chr> <chr>    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 "ETS(Tur… Sout… Other r… Test   1.84  8.56  6.41 0.913  2.92   NaN   NaN 0.225

#8.556635
snfitms <- trainms %>% 
  model(SNAIVE(Turnover))
fcsnfitms <- snfitms %>% 
  forecast(h=36)
accuracy(fcsnfitms,testms)

## # A tibble: 1 × 12
##   .model  State Industry .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
##   <chr>   <chr> <chr>    <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE… Sout… Other r… Test  -0.314  7.93  6.27 -0.275  2.92   NaN   NaN 0.213

#7.932966
#looks like the snaive is a better model.

exercise 6

sumtourism <- tourism %>% 
  summarise(Trips=sum(Trips))
autoplot(sumtourism)

## Plot variable not specified, automatically selected `.vars = Trips`

sumtourism

## # A tsibble: 80 x 2 [1Q]
##    Quarter  Trips
##      <qtr>  <dbl>
##  1 1998 Q1 23182.
##  2 1998 Q2 20323.
##  3 1998 Q3 19827.
##  4 1998 Q4 20830.
##  5 1999 Q1 22087.
##  6 1999 Q2 21458.
##  7 1999 Q3 19914.
##  8 1999 Q4 20028.
##  9 2000 Q1 22339.
## 10 2000 Q2 19941.
## # … with 70 more rows

sumtourism %>%
  model(
    STL(Trips ~ trend(window = 7) +
                   season(window = "periodic"),
    robust = TRUE)) %>% 
   components() %>%
  autoplot()

#clear seasonal and trend.

stletsdamped <- sumtourism %>% model(decomposition_model(
  STL(Trips),
  ETS(season_adjust ~ error("A") + trend("Ad") + season("N"))))
accuracy(stletsdamped)

## # A tibble: 1 × 10
##   .model                 .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>                  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 "decomposition_model(… Trai…  103.  763.  576. 0.367  2.72 0.607 0.629 -0.0174

#763
fcstl <- stletsdamped %>% 
  forecast(h= 8)
fcstl %>% 
  autoplot(sumtourism)+
  geom_line(aes(y=.fitted), color="green",
            data=augment(stletsdamped))

## Warning: Removed 4 row(s) containing missing values (geom_path).

stletsdampeda <- sumtourism %>% model(decomposition_model(
  STL(Trips),
  ETS(season_adjust ~ error("A") + trend("A") + season("N"))))
accuracy(stletsdampeda)

## # A tibble: 1 × 10
##   .model                 .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##   <chr>                  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 "decomposition_model(… Trai…  99.7  763.  574. 0.359  2.71 0.604 0.628 -0.0182

#762
fcstla <- stletsdampeda %>% 
  forecast(h=8)
fcstla %>% 
  autoplot(sumtourism)+
  geom_line(aes(y=.fitted), color="green",
            data=augment(stletsdampeda))

## Warning: Removed 4 row(s) containing missing values (geom_path).

blah <- sumtourism %>% 
  model(ETS(Trips))
report(blah)

## Series: Trips 
## Model: ETS(A,A,A) 
##   Smoothing parameters:
##     alpha = 0.4495675 
##     beta  = 0.04450178 
##     gamma = 0.0001000075 
## 
##   Initial states:
##      l[0]      b[0]      s[0]     s[-1]     s[-2]    s[-3]
##  21689.64 -58.46946 -125.8548 -816.3416 -324.5553 1266.752
## 
##   sigma^2:  699901.4
## 
##      AIC     AICc      BIC 
## 1436.829 1439.400 1458.267

#aaa. 
accuracy(blah)

## # A tibble: 1 × 10
##   .model     .type       ME  RMSE   MAE   MPE  MAPE  MASE RMSSE     ACF1
##   <chr>      <chr>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>
## 1 ETS(Trips) Training  105.  794.  604. 0.379  2.86 0.636 0.653 -0.00151

fcblah <- blah %>% 
  forecast(h=8)
fcblah %>% 
autoplot(sumtourism)+
  geom_line(aes(y=.fitted), color="green",
            data=augment(blah))

#793.6695. 
#looks like the decomposed model is better by like 30 points.
#out of all the graphs, I think the first one with the damped trend is the best because it fits the values much better.

###7

new <- aus_arrivals %>% 
  filter(Origin=="NZ")
autoplot(new)

## Plot variable not specified, automatically selected `.vars = Arrivals`

view(new)
#upward trend, seasonality is increasing with time.

train <- new %>% 
  slice(1:119)
autoplot(train)

## Plot variable not specified, automatically selected `.vars = Arrivals`

fittrain <- train %>% 
  model(ETS(Arrivals~error("M")+trend("A")+season("M")))
fctrain <- fittrain %>% 
  forecast(h=8)
fctrain%>% 
  autoplot(new)+
  geom_line(aes(y=.fitted), color="green",
            data=augment(fittrain))

#it is needed because the lvl of seasonality changes over time.
  #i) an ETS model; ii) a ETS model applied to a log transformed series; iii) a seasonal naive method; iv) an STL decomposition applied to the log transformed data followed by an ETS model applied to the seasonally adjusted (transformed) data.
sestrain <- train %>% 
  model(ETS(Arrivals~error("A")+trend("N")+season("N")))
report(sestrain)

## Series: Arrivals 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.2580434 
## 
##   Initial states:
##      l[0]
##  64589.28
## 
##   sigma^2:  1026511786
## 
##      AIC     AICc      BIC 
## 3041.881 3042.090 3050.219

fcsestrain <- sestrain %>% 
  forecast(h=8)
fcsestrain %>% 
  autoplot(new, level = NULL)+
  geom_line(aes(y=.fitted), color="green",
            data=augment(sestrain))

logtrain <- train %>% 
  model(ETS(log(Arrivals)~error("A")+trend("N")+season("N")))
report(logtrain)

## Series: Arrivals 
## Model: ETS(A,N,N) 
## Transformation: log(Arrivals) 
##   Smoothing parameters:
##     alpha = 0.280708 
## 
##   Initial states:
##     l[0]
##  11.0499
## 
##   sigma^2:  0.0496
## 
##      AIC     AICc      BIC 
## 215.2636 215.4723 223.6010

fclogtrain <- logtrain %>% 
  forecast(h=8)
fclogtrain %>% 
  autoplot(new, level = NULL)+
  geom_line(aes(y=.fitted), color="green",
            data=augment(logtrain))

sntrain <- train %>% 
  model(SNAIVE(Arrivals))
fcsntrain <- sntrain %>% 
  forecast(h=8)
fcsntrain %>% 
  autoplot(new, level = NULL)+
  geom_line(aes(y=.fitted), color="green",
            data=augment(sntrain))

## Warning: Removed 4 row(s) containing missing values (geom_path).

weird <- train %>% 
  model(decomposition_model(STL(log(Arrivals)),ETS(season_adjust)))
fcweird <- weird %>% 
  forecast(h=8)
fcweird %>% 
  autoplot(new, level = NULL)+
  geom_line(aes(y=.fitted), color="green",
            data=augment(weird))

## Warning: Removed 4 row(s) containing missing values (geom_path).

report(sestrain)#3042

## Series: Arrivals 
## Model: ETS(A,N,N) 
##   Smoothing parameters:
##     alpha = 0.2580434 
## 
##   Initial states:
##      l[0]
##  64589.28
## 
##   sigma^2:  1026511786
## 
##      AIC     AICc      BIC 
## 3041.881 3042.090 3050.219

report(logtrain)#215

## Series: Arrivals 
## Model: ETS(A,N,N) 
## Transformation: log(Arrivals) 
##   Smoothing parameters:
##     alpha = 0.280708 
## 
##   Initial states:
##     l[0]
##  11.0499
## 
##   sigma^2:  0.0496
## 
##      AIC     AICc      BIC 
## 215.2636 215.4723 223.6010

report(sntrain)

## Series: Arrivals 
## Model: SNAIVE 
## 
## sigma^2: 321651340.4978

report(weird)#-44

## Series: Arrivals 
## Model: STL decomposition model 
## Transformation: log(Arrivals) 
## Combination: season_adjust + season_year
## 
## ========================================
## 
## Series: season_adjust 
## Model: ETS(A,A,N) 
##   Smoothing parameters:
##     alpha = 0.8101279 
##     beta  = 0.0001000026 
## 
##   Initial states:
##      l[0]       b[0]
##  11.11081 0.01153924
## 
##   sigma^2:  0.0055
## 
##       AIC      AICc       BIC 
## -44.78504 -44.25407 -30.88942 
## 
## Series: season_year 
## Model: SNAIVE 
## 
## sigma^2: 0

gg_tsresiduals(sestrain)

gg_tsresiduals(logtrain)

gg_tsresiduals(sntrain)

## Warning: Removed 4 row(s) containing missing values (geom_path).

## Warning: Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_bin).

gg_tsresiduals(weird)

## Warning: Removed 4 row(s) containing missing values (geom_path).

## Warning: Removed 4 rows containing missing values (geom_point).

## Warning: Removed 4 rows containing non-finite values (stat_bin).

#the 4th one is definetly best because it has the lowest aicc and its residuals resemble wn. 
cv <- new %>% 
  stretch_tsibble(.init = 36, .step = 3)
cv

## # A tsibble: 2,511 x 4 [1Q]
## # Key:       .id, Origin [31]
##    Quarter Origin Arrivals   .id
##      <qtr> <chr>     <int> <int>
##  1 1981 Q1 NZ        49140     1
##  2 1981 Q2 NZ        87467     1
##  3 1981 Q3 NZ        85841     1
##  4 1981 Q4 NZ        61882     1
##  5 1982 Q1 NZ        42045     1
##  6 1982 Q2 NZ        63081     1
##  7 1982 Q3 NZ        73275     1
##  8 1982 Q4 NZ        54808     1
##  9 1983 Q1 NZ        41030     1
## 10 1983 Q2 NZ        56155     1
## # … with 2,501 more rows

  cvfit <- cv %>% model(ETS(Arrivals))
  cvfc <- cvfit %>% 
    forecast(h=3)
  accuracy(cvfit)

## # A tibble: 31 × 12
##      .id Origin .model  .type    ME   RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##    <int> <chr>  <chr>   <chr> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
##  1     1 NZ     ETS(Ar… Trai… 1879.  9578. 7477. 1.41   8.94 0.479 0.483 -0.0211
##  2     2 NZ     ETS(Ar… Trai… 1147.  9371. 7352. 0.709  8.82 0.499 0.493 -0.0439
##  3     3 NZ     ETS(Ar… Trai… 1391.  9832. 7833. 0.933  9.18 0.542 0.529  0.0369
##  4     4 NZ     ETS(Ar… Trai… 1355.  9947. 8013. 0.767  9.17 0.541 0.530 -0.0619
##  5     5 NZ     ETS(Ar… Trai… 1624. 10433. 8318. 0.918  9.24 0.563 0.561 -0.0691
##  6     6 NZ     ETS(Ar… Trai… 1516. 10221. 8209. 0.845  8.98 0.545 0.548 -0.0682
##  7     7 NZ     ETS(Ar… Trai… 1066. 10181. 8071. 0.458  8.77 0.556 0.560 -0.0565
##  8     8 NZ     ETS(Ar… Trai… 1209. 10154. 7986. 0.771  8.69 0.574 0.574 -0.0797
##  9     9 NZ     ETS(Ar… Trai… 1273.  9880. 7731. 0.821  8.36 0.548 0.559 -0.0794
## 10    10 NZ     ETS(Ar… Trai… 1701. 10451. 7995. 1.19   8.47 0.522 0.545 -0.0466
## # … with 21 more rows

   cvfitlog <- cv %>% model(ETS(log(Arrivals) ~ error("A") +trend("A") + season("A")))
   cvfclog <- cvfitlog %>% 
     forecast(h=3)
   accuracy(cvfitlog)

## # A tibble: 31 × 12
##      .id Origin .model       .type     ME   RMSE   MAE     MPE  MAPE  MASE RMSSE
##    <int> <chr>  <chr>        <chr>  <dbl>  <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl>
##  1     1 NZ     "ETS(log(Ar… Trai… -883.  10340. 7635. -0.457   8.85 0.489 0.522
##  2     2 NZ     "ETS(log(Ar… Trai… -917.   9897. 7228. -0.535   8.39 0.491 0.520
##  3     3 NZ     "ETS(log(Ar… Trai…  -96.4  9934. 8056. -0.591   9.41 0.558 0.535
##  4     4 NZ     "ETS(log(Ar… Trai…  298.   9731. 7915. -0.379   9.11 0.535 0.519
##  5     5 NZ     "ETS(log(Ar… Trai…   35.2 10672. 8275. -0.0333  9.20 0.560 0.574
##  6     6 NZ     "ETS(log(Ar… Trai…   47.4 10175. 8172. -0.562   9.10 0.543 0.546
##  7     7 NZ     "ETS(log(Ar… Trai…   58.8 10233. 8308. -0.605   9.15 0.573 0.563
##  8     8 NZ     "ETS(log(Ar… Trai… -195.  10200. 8237. -0.762   8.99 0.592 0.577
##  9     9 NZ     "ETS(log(Ar… Trai…  258.   9781. 7663. -0.245   8.31 0.543 0.553
## 10    10 NZ     "ETS(log(Ar… Trai…   43.8 10335. 7996. -0.524   8.51 0.522 0.539
## # … with 21 more rows, and 1 more variable: ACF1 <dbl>

    cvfitsn <- cv %>% model(SNAIVE(Arrivals))
    cvfcsn <- cvfitsn %>% 
      forecast(h=3)
    accuracy(cvfitsn)

## # A tibble: 31 × 12
##      .id Origin .model   .type    ME   RMSE    MAE   MPE  MAPE  MASE RMSSE  ACF1
##    <int> <chr>  <chr>    <chr> <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
##  1     1 NZ     SNAIVE(… Trai… 5156. 19812. 15599.  3.84  17.2     1     1 0.670
##  2     2 NZ     SNAIVE(… Trai… 4255. 19020. 14721.  3.02  16.2     1     1 0.678
##  3     3 NZ     SNAIVE(… Trai… 4031. 18575. 14450.  2.96  15.8     1     1 0.654
##  4     4 NZ     SNAIVE(… Trai… 4429. 18751. 14802.  3.00  15.8     1     1 0.604
##  5     5 NZ     SNAIVE(… Trai… 3709. 18584. 14775.  2.44  15.5     1     1 0.583
##  6     6 NZ     SNAIVE(… Trai… 4698. 18650. 15057.  3.28  15.6     1     1 0.589
##  7     7 NZ     SNAIVE(… Trai… 4062. 18170. 14508.  2.78  14.9     1     1 0.571
##  8     8 NZ     SNAIVE(… Trai… 3797. 17677. 13918.  2.64  14.3     1     1 0.578
##  9     9 NZ     SNAIVE(… Trai… 4537. 17675. 14115.  3.14  14.2     1     1 0.582
## 10    10 NZ     SNAIVE(… Trai… 6213  19185. 15304.  4.15  14.6     1     1 0.616
## # … with 21 more rows

cvfitdc <- cv %>%  model(decomposition_model(STL(log(Arrivals)),ETS(season_adjust)))
 cvfcdc <- cvfitdc %>% 
   forecast(h=3)
 accuracy(cvfitdc)

## # A tibble: 31 × 12
##      .id Origin .model   .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
##    <int> <chr>  <chr>    <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
##  1     1 NZ     decompo… Trai… 2030. 9076. 6992.  1.75  8.31 0.448 0.458 -0.0848
##  2     2 NZ     decompo… Trai… 1453. 8884. 6879.  1.10  8.15 0.467 0.467 -0.0758
##  3     3 NZ     decompo… Trai… 1528. 8995. 7072.  1.23  8.23 0.489 0.484 -0.0932
##  4     4 NZ     decompo… Trai… 1469. 9279. 7485.  1.05  8.46 0.506 0.495 -0.141 
##  5     5 NZ     decompo… Trai… 1800. 9432. 7425.  1.34  8.18 0.503 0.508 -0.129 
##  6     6 NZ     decompo… Trai… 1786. 9125. 7070.  1.31  7.75 0.470 0.489 -0.132 
##  7     7 NZ     decompo… Trai… 1370. 8875. 6857.  1.00  7.48 0.473 0.488 -0.124 
##  8     8 NZ     decompo… Trai… 1568. 8715. 6801.  1.19  7.34 0.489 0.493 -0.142 
##  9     9 NZ     decompo… Trai… 1643. 8492. 6590.  1.25  7.05 0.467 0.480 -0.142 
## 10    10 NZ     decompo… Trai… 2129. 9115. 6875.  1.57  7.13 0.449 0.475 -0.143 
## # … with 21 more rows

#it looks like the best one is the additive log.

###8

port <- aus_production %>% 
  select(Cement)
port

## # A tsibble: 218 x 2 [1Q]
##    Cement Quarter
##     <dbl>   <qtr>
##  1    465 1956 Q1
##  2    532 1956 Q2
##  3    561 1956 Q3
##  4    570 1956 Q4
##  5    529 1957 Q1
##  6    604 1957 Q2
##  7    603 1957 Q3
##  8    582 1957 Q4
##  9    554 1958 Q1
## 10    620 1958 Q2
## # … with 208 more rows

cv1 <- port %>% 
  stretch_tsibble(.init = 5, .step = 1)
cvets <- cv1 %>% 
  model(ETS(Cement))
fccvets <- cvets %>% 
  forecast(h=4)
fccvets

## # A fable: 856 x 5 [1Q]
## # Key:     .id, .model [214]
##      .id .model      Quarter       Cement .mean
##    <int> <chr>         <qtr>       <dist> <dbl>
##  1     1 ETS(Cement) 1957 Q2 N(531, 2261)  531.
##  2     1 ETS(Cement) 1957 Q3 N(531, 2261)  531.
##  3     1 ETS(Cement) 1957 Q4 N(531, 2261)  531.
##  4     1 ETS(Cement) 1958 Q1 N(531, 2261)  531.
##  5     2 ETS(Cement) 1957 Q3 N(543, 2794)  543.
##  6     2 ETS(Cement) 1957 Q4 N(543, 2794)  543.
##  7     2 ETS(Cement) 1958 Q1 N(543, 2794)  543.
##  8     2 ETS(Cement) 1958 Q2 N(543, 2794)  543.
##  9     3 ETS(Cement) 1957 Q4 N(552, 2842)  552.
## 10     3 ETS(Cement) 1958 Q1 N(552, 2842)  552.
## # … with 846 more rows

report(cvets)

## Warning in report.mdl_df(cvets): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.

## # A tibble: 214 × 10
##      .id .model          sigma2 log_lik   AIC  AICc   BIC   MSE  AMSE     MAE
##    <int> <chr>            <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
##  1     1 ETS(Cement)    0.00801   -22.1  50.1  74.1  48.9 1356.  913.  0.0518
##  2     2 ETS(Cement)    0.00947   -28.0  61.9  73.9  61.3 1863. 1371.  0.0641
##  3     3 ETS(Cement)    0.00933   -33.5  72.9  80.9  72.8 2030. 1461.  0.0673
##  4     4 ETS(Cement) 2135.        -37.8  81.7  87.7  81.9 1601. 2714. 35.2   
##  5     5 ETS(Cement)    0.00694   -43.3  92.5  97.3  93.1 1667. 1136.  0.0569
##  6     6 ETS(Cement)    0.00742   -49.2 104.  108.  105.  1874. 1321.  0.0601
##  7     7 ETS(Cement) 2093.        -54.1 114.  118.  115.  1712. 2680. 36.5   
##  8     8 ETS(Cement) 1884.        -59.1 124.  127.  126.  1570. 2631. 33.8   
##  9     9 ETS(Cement) 2069.        -65.2 136.  139.  138.  1751. 2373. 38.3   
## 10    10 ETS(Cement)    0.00426   -67.0 144.  151.  147.  1032.  993.  0.0474
## # … with 204 more rows

cvsn <- cv1 %>% 
  model(SNAIVE(Cement))
fccvsn <- cvsn %>% 
  forecast(h=4)
fccvsn

## # A fable: 856 x 5 [1Q]
## # Key:     .id, .model [214]
##      .id .model         Quarter       Cement .mean
##    <int> <chr>            <qtr>       <dist> <dbl>
##  1     1 SNAIVE(Cement) 1957 Q2 N(532, 4096)   532
##  2     1 SNAIVE(Cement) 1957 Q3 N(561, 4096)   561
##  3     1 SNAIVE(Cement) 1957 Q4 N(570, 4096)   570
##  4     1 SNAIVE(Cement) 1958 Q1 N(529, 4096)   529
##  5     2 SNAIVE(Cement) 1957 Q3 N(561, 4640)   561
##  6     2 SNAIVE(Cement) 1957 Q4 N(570, 4640)   570
##  7     2 SNAIVE(Cement) 1958 Q1 N(529, 4640)   529
##  8     2 SNAIVE(Cement) 1958 Q2 N(604, 4640)   604
##  9     3 SNAIVE(Cement) 1957 Q4 N(570, 3681)   570
## 10     3 SNAIVE(Cement) 1958 Q1 N(529, 3681)   529
## # … with 846 more rows

report(cvsn)

## Warning in report.mdl_df(cvsn): Model reporting is only supported for individual
## models, so a glance will be shown. To see the report for a specific model, use
## `select()` and `filter()` to identify a single model.

## # A tibble: 214 × 3
##      .id .model         sigma2
##    <int> <chr>           <dbl>
##  1     1 SNAIVE(Cement)    NA 
##  2     2 SNAIVE(Cement)    32 
##  3     3 SNAIVE(Cement)   241.
##  4     4 SNAIVE(Cement)   721 
##  5     5 SNAIVE(Cement)   642 
##  6     6 SNAIVE(Cement)   635.
##  7     7 SNAIVE(Cement)   532.
##  8     8 SNAIVE(Cement)   488.
##  9     9 SNAIVE(Cement)   481 
## 10    10 SNAIVE(Cement)   448.
## # … with 204 more rows

#b LOL!!!!!
port2 <- port %>%
  slice(1:(n()-4)) %>%
  stretch_tsibble(.init = 5, .step = 4)

portcomp2 <- port2 %>%
  model(ETS(Cement),
        SNAIVE(Cement)) %>%
  forecast(h = "1 year")

cement_mse <- accuracy(portcomp2, port) %>%
  mutate(MSE = (RMSE)^2) %>%
  select(MSE, RMSE)
cement_mse

## # A tibble: 2 × 2
##      MSE  RMSE
##    <dbl> <dbl>
## 1  9781.  98.9
## 2 17839. 134.

cement_mse %>%
  add_column(Model = c('ETS', 'SNAIVE'))

## # A tibble: 2 × 3
##      MSE  RMSE Model 
##    <dbl> <dbl> <chr> 
## 1  9781.  98.9 ETS   
## 2 17839. 134.  SNAIVE

cement_final <- cement_mse %>%
  add_column(Model = c('ETS', 'SNAIVE')) %>%
  select(Model, MSE, RMSE)

HW 5

Matt Mullis

11/5/22

Load packages and data

Questions

Exercise 1

Exercise 2

Exercise 3

Exercise 4

exercise 6