HW #6 Rmarkdown

Load packages and data

library(fpp3)
library(readxl)

Exercise 1

They all have an identical number of lags, but they have different critical values. The critical values progressively get smaller from left to right as the amount of numbers included gets larger. More observations allows you to check each lag for autocorrelation more thoroughly.

I believe that they all resemble white-noise because there is no autocorrelation throughout. If the numbers are truly random the residuals should resemble white noise to begin with.

When more observations are included the critical values decrease. It changes the necessary critical values to determine autocorrelation. You can check lags a larger number of times when you have more observations.

Exercise 2

amazon.stock <- gafa_stock %>%
  filter(Symbol == "AMZN") %>%
  select(Date, Close)

amazon.stock %>%
  ggplot(aes(x = Date)) +
  geom_line(aes(y = Close)) +
  labs(title = "Amazon Stock Price Over Time", y = "Closing Price")

amazon.stock %>%
  gg_tsdisplay(Close, plot_type = 'partial')

## Warning: Provided data has an irregular interval, results should be treated with caution. Computing ACF by observation.
## Provided data has an irregular interval, results should be treated with caution. Computing ACF by observation.

amazon.stock %>%
  features(Close, unitroot_kpss)

## # A tibble: 1 x 2
##   kpss_stat kpss_pvalue
##       <dbl>       <dbl>
## 1      14.0        0.01

amazon.stock %>%
  ggplot(aes(x = Date)) +
  geom_line(aes(y = difference(Close))) +
  labs(title = "Amazon Differenced Stock Price Over Time", y = "Differenced Closing Price")

## Warning: Removed 1 row(s) containing missing values (geom_path).

#Amazon stock is clearly not stationary. There is a clear upward trend and it is not horizontal. The ACF shows autocorrelation at every single lag 1 through 30. This makes sense given how stock prices behave. The KPSS test performed above shows that the data is not stationary because it is below 0.05.

Exercise 3

turkey.gdp <- global_economy %>%
  filter(Country == "Turkey") %>%
  mutate(GDP = (GDP/1000000)) %>%
  select(Year, GDP) %>%
  na.omit()

turkey.gdp %>%
  ggplot(aes(x = Year, y = GDP)) +
  geom_line(aes(y = GDP)) +
  labs(y = 'GDP in Millions', title = "Turkey's GDP over Time")

turkey.gdp %>%
  gg_tsdisplay(GDP, plot_type = 'partial')

turkey.guerro <- turkey.gdp %>%
  features(GDP, features = guerrero)

turkey.box.cox <- turkey.gdp %>%
  mutate(GDP = box_cox(GDP, turkey.guerro))

turkey.box.cox %>%
  features(GDP, unitroot_ndiffs)

## # A tibble: 1 x 1
##   ndiffs
##    <int>
## 1      1

turkey.diff <- turkey.box.cox %>%
  mutate(GDP = difference(GDP)) %>%
  na.omit()


turkey.diff %>%
  ggplot(aes(x = Year, y = GDP)) +
  geom_line(aes(y = GDP)) +
  labs(y = 'Box-Cox and Differenced GDP', title = "Turkey's GDP After Box-Cox and Differencing")

turkey.diff %>%
  features(GDP, unitroot_kpss)

## # A tibble: 1 x 2
##   kpss_stat kpss_pvalue
##       <dbl>       <dbl>
## 1    0.0889         0.1

#A p-value greater than .05 means that my data is stationary! 



#Backshift notation: (1-B)yt

#b


tasmania.takings <- aus_accommodation %>%
  filter(State == "Tasmania") %>%
  select(Date, Takings)

tasmania.takings %>%
  ggplot(aes(x = Date, y = Takings)) +
  geom_line(aes(y = Takings)) +
  labs(y = 'Takings', title = "Tasmanian Takings Over Time")

#Does not look like a box-cox will be necessay. Very homoskedastic.


tasmania.takings %>%
  gg_tsdisplay(Takings, plot_type = 'partial')

tasmania.takings %>%
  features(Takings, unitroot_nsdiffs)

## # A tibble: 1 x 1
##   nsdiffs
##     <int>
## 1       1

tasmania.diff <- tasmania.takings %>%
  mutate(Takings = difference(Takings, 4)) %>%
  na.omit()

tasmania.diff %>%
  features(Takings, unitroot_ndiffs)

## # A tibble: 1 x 1
##   ndiffs
##    <int>
## 1      0

#Looks like I will need to do a seasonal difference, but no first difference is necessary. 


tasmania.diff %>%
  ggplot(aes(x = Date, y = Takings)) +
  geom_line(aes(y = Takings)) +
  labs(y = 'Seasonally Differenced Takings', title = "Tasmania's Seasonally Differenced Takings Over Time")

tasmania.diff %>%
  features(Takings, unitroot_kpss)

## # A tibble: 1 x 2
##   kpss_stat kpss_pvalue
##       <dbl>       <dbl>
## 1     0.196         0.1

#P value greater than .05 means my data is stationary! 


#Back shift notation: (1-B1-B4+B^5)yt




#c

souvenirs %>%
  ggplot(aes(x = Month, y = Sales)) +
  geom_line(aes(y = Sales)) +
  labs(y = 'Sales', title = "Monthly Souvenir Sales")

souvenirs %>%
  gg_tsdisplay(Sales, plot_type = 'partial')

souvenirs.guerro <- souvenirs %>%
  features(Sales, features = guerrero)

souvenirs.box.cox <- souvenirs %>%
  mutate(Sales = box_cox(Sales, souvenirs.guerro))


souvenirs.box.cox %>%
  features(Sales, unitroot_nsdiffs)

## # A tibble: 1 x 1
##   nsdiffs
##     <int>
## 1       1

souvenirs.diff <- souvenirs.box.cox %>%
  mutate(Sales = difference(Sales, 12)) %>%
  na.omit()

souvenirs.diff %>%
  features(Sales, unitroot_ndiffs)

## # A tibble: 1 x 1
##   ndiffs
##    <int>
## 1      0

#I will need to do a seasonal difference. The first difference is no longer necessary. 



souvenirs.diff %>%
  ggplot(aes(x = Month, y = Sales)) +
  geom_line(aes(y = Sales)) +
  labs(y = 'Seasonally Differenced Sales', title = "Seasonally Differenced Souvenir Sales Over Time")

#Now it looks relatively stationary. 

souvenirs.diff %>%
  features(Sales, unitroot_kpss)

## # A tibble: 1 x 2
##   kpss_stat kpss_pvalue
##       <dbl>       <dbl>
## 1     0.240         0.1

#A KPSS p-value above .05 means that my data is stationary!




#Back Shift Notation: (1-B^12)yt

Exercise 4

#a and b)

ar1 <- function(phi, n = 100) {
  y <- numeric(n)
  e <- rnorm(n)
  for (i in 2:n) {
    y[i] <- phi * y[i - 1] + e[i]
  }
  tsibble(idx = seq_len(n), y = y, index = idx)
}
ar1(0.6) %>%
  autoplot() +
  labs(title = "AR1(0.6)")

## Plot variable not specified, automatically selected `.vars = y`

ar1(0) %>%
  autoplot() +
  labs(title = "AR1(0)")

## Plot variable not specified, automatically selected `.vars = y`

ar1(-1) %>%
  autoplot() +
  labs(title = "AR1(-1)")

## Plot variable not specified, automatically selected `.vars = y`

ar1(1) %>%
  autoplot() +
  labs(title = "AR1(1)")

## Plot variable not specified, automatically selected `.vars = y`

#As I change Chi the level changes with it. When Chi = -1 it looks like a sound wave; large fluctuations with constant sign changes. When Chi = 0 the data resembles white noise. When Chi = 0.6 the data looks stationary which is what we want. When Chi = 1 we have more of a trend that the rest with an element of randomness as well. 




#c and d)

ma1 = function(param = 0.6)
{
  seed = 123
  y <- ts(numeric(100))
  forecast.err <- rnorm(100)
  rand.err = rnorm(100)
  for(i in 2:100)
    y[i] <- param *  forecast.err[i-1] + rand.err[i]
  return(y)
}

ma1(0.6) %>%
  as_tsibble() %>%
  autoplot() +
  labs(title = "MA1(0.6)")

## Plot variable not specified, automatically selected `.vars = value`

ma1(0) %>%
  as_tsibble() %>%
  autoplot() +
  labs(title = "MA1(0)")

## Plot variable not specified, automatically selected `.vars = value`

ma1(1) %>%
  as_tsibble() %>%
  autoplot() +
  labs(title = "MA1(1)")

## Plot variable not specified, automatically selected `.vars = value`

ma1(-1) %>%
  as_tsibble() %>%
  autoplot() +
  labs(title = "MA1(-1)")

## Plot variable not specified, automatically selected `.vars = value`

#I am struggling to describe how the data is changing with different theta values. I can say that they all appear to resemble white noise. The values on the y-axis do change occasionally.  


#e)


arma1 = function(alpha = 0.6, theta = 0.6)
{
  seed = 123
  y <- ts(numeric(100))
  e <- rnorm(100)
  rand.error = rnorm(100)
  for(i in 2:100)
    y[i] <- alpha *  y[i-1] + theta * e[i] + rand.error[i]
  return(y)
}
arma1() %>%
  as_tsibble() %>%
  autoplot() +
  labs(title = "ARMA(1,1)")

## Plot variable not specified, automatically selected `.vars = value`

#f)

ar2 = function(alpha1 = -0.8,  alpha2 = 0.3)
{
  seed = 123
  y <- ts(numeric(100))
  e <- rnorm(100)
  for(i in 3:100)
    y[i] <- alpha1 *  y[i-1] + alpha2 *  y[i-2] + e[i]
  return(y)
}

ar2() %>%
  as_tsibble() %>%
  autoplot() +
  labs(title = "AR(2)")

## Plot variable not specified, automatically selected `.vars = value`

#g)

# AR(2) has a clear exponential trend and is not stationary. ARMA(1,1) appear to be stationary it has no clear trend or seasonality.

Exercise 5

aus_airpassengers %>%
  filter(Year < 2012)

## # A tsibble: 42 x 2 [1Y]
##     Year Passengers
##    <dbl>      <dbl>
##  1  1970       7.32
##  2  1971       7.33
##  3  1972       7.80
##  4  1973       9.38
##  5  1974      10.7 
##  6  1975      11.1 
##  7  1976      10.9 
##  8  1977      11.3 
##  9  1978      12.1 
## 10  1979      13.0 
## # ... with 32 more rows
## # i Use `print(n = ...)` to see more rows

airpassengers.auto<- aus_airpassengers %>%
  model(
    'Auto' = ARIMA(Passengers)
  )

airpassengers.auto %>%
  gg_tsresiduals()

augment(airpassengers.auto) %>%
  features(.resid, ljung_box, lag = 10, dof = 0)

## # A tibble: 1 x 3
##   .model lb_stat lb_pvalue
##   <chr>    <dbl>     <dbl>
## 1 Auto      6.70     0.754

airpassengers.auto %>%
  forecast(h = 10) %>%
  autoplot(aus_airpassengers)

#b)


#Back Shift Notation: (1-2B+B^2)yt




#c)

airpassengers.010 <- aus_airpassengers %>%
  model(
    ARIMA(Passengers ~ pdq(0,1,0))
  )

airpassengers.010 %>%
  gg_tsresiduals()

augment(airpassengers.010) %>%
  features(.resid, ljung_box, lag = 10, dof = 0)

## # A tibble: 1 x 3
##   .model                           lb_stat lb_pvalue
##   <chr>                              <dbl>     <dbl>
## 1 ARIMA(Passengers ~ pdq(0, 1, 0))    6.77     0.747

airpassengers.010 %>%
  forecast(h = 10) %>%
  autoplot(aus_airpassengers)

#d)

airpassengers.212 <- aus_airpassengers %>%
  model(ARIMA(Passengers ~ 1 + pdq(2,1,2)))

airpassengers.212 %>%
  forecast(h = 10) %>%
  autoplot(aus_airpassengers)

airpassengers.noc <- aus_airpassengers %>%
  model(ARIMA(Passengers ~ 0 + pdq(2,1,2)))

## Warning: 1 error encountered for ARIMA(Passengers ~ 0 + pdq(2, 1, 2))
## [1] non-stationary AR part from CSS

airpassengers.noc %>%
  forecast(h = 10) %>%
  autoplot(aus_airpassengers)

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

## Warning: Removed 10 row(s) containing missing values (geom_path).

#Without the constant I do not have a forecast. Looks like the constant is necessary for the forecast. 



#e)

airpassengers.021 <- aus_airpassengers %>%
  model(
    ARIMA(Passengers ~ 1 + pdq(0,2,1)))

## Warning: Model specification induces a quadratic or higher order polynomial trend. 
## This is generally discouraged, consider removing the constant or reducing the number of differences.

airpassengers.021 %>%
  forecast(h = 10) %>%
  autoplot(aus_airpassengers)

#The forecast values are a lot higher when the constant is included and the trend is steeper.

Exercise 6

best.country.gdp <- global_economy %>%
  filter(Code == "USA") %>%
  mutate(GDP = (GDP/1000000000000)) %>%
  select(Year, GDP)

best.country.gdp %>%
  ggplot(aes(x = Year, y = GDP)) +
  geom_line(aes(y = GDP)) +
  labs(title = "US GDP Since 1960", y = "GDP in Trillions")

best.country.guerro <- best.country.gdp %>%
  features(GDP, features = guerrero)

best.country.boxcox <- best.country.gdp %>%
  mutate(GDP = box_cox(GDP, best.country.guerro))

best.country.boxcox %>%
  autoplot()

## Plot variable not specified, automatically selected `.vars = GDP`

#Clearly no need for a box cox. 


#b)

best.country.gdp %>%
  gg_tsdisplay(GDP, plot_type = 'partial')

best.country.gdp %>%
  features(GDP, unitroot_ndiffs)

## # A tibble: 1 x 1
##   ndiffs
##    <int>
## 1      2

best.country.auto <- best.country.gdp %>%
  model(
    'Auto' = ARIMA(GDP)
  )

best.country.auto %>%
  report()

## Series: GDP 
## Model: ARIMA(0,2,2) 
## 
## Coefficients:
##           ma1      ma2
##       -0.4206  -0.3048
## s.e.   0.1197   0.1078
## 
## sigma^2 estimated as 0.02615:  log likelihood=23.26
## AIC=-40.52   AICc=-40.06   BIC=-34.45

#Looks like a suitable ARIMA model.



#c

best.country.compare <- best.country.gdp %>%
  model(
    'Auto' = ARIMA(GDP),
    'Diffs Only' = ARIMA(GDP ~ pdq(0,2,0)),
    'Autoregression + Diff' = ARIMA(GDP ~ pdq(1,2,0)),
    "Moving Average + Diff" = ARIMA(GDP ~ pdq(0,2,1)),
    "AR + Diff + MA" = ARIMA(GDP ~ pdq(1,2,1))
  )

best.country.compare %>%
  report()

## Warning in report.mdl_df(.): Model reporting is only supported for individual
## models, so a glance will be shown. To see the report for a specific model, use
## `select()` and `filter()` to identify a single model.

## # A tibble: 5 x 8
##   .model                sigma2 log_lik   AIC  AICc   BIC ar_roots  ma_roots 
##   <chr>                  <dbl>   <dbl> <dbl> <dbl> <dbl> <list>    <list>   
## 1 Auto                  0.0261    23.3 -40.5 -40.1 -34.4 <cpl [0]> <cpl [2]>
## 2 Diffs Only            0.0310    17.8 -33.5 -33.5 -31.5 <cpl [0]> <cpl [0]>
## 3 Autoregression + Diff 0.0306    18.6 -33.3 -33.0 -29.2 <cpl [1]> <cpl [0]>
## 4 Moving Average + Diff 0.0292    19.8 -35.6 -35.3 -31.5 <cpl [0]> <cpl [1]>
## 5 AR + Diff + MA        0.0263    23.1 -40.2 -39.7 -34.1 <cpl [1]> <cpl [1]>

#The auto ARIMA model produced the smallest AICc which is to be expected, but the 1,2,1 was hot on its heels.  

#d

best.country.all <- best.country.gdp %>%
  model(
    "AR + Diff + MA" = ARIMA(GDP ~ pdq(1,2,1))
  )
best.country.auto %>%
  gg_tsresiduals()

best.country.all %>%
  gg_tsresiduals()

augment(best.country.auto) %>%
  features(.resid, ljung_box, lag = 10, dof = 0)

## # A tibble: 1 x 3
##   .model lb_stat lb_pvalue
##   <chr>    <dbl>     <dbl>
## 1 Auto      12.2     0.273

augment(best.country.all) %>%
  features(.resid, ljung_box, lag = 10, dof = 0)

## # A tibble: 1 x 3
##   .model         lb_stat lb_pvalue
##   <chr>            <dbl>     <dbl>
## 1 AR + Diff + MA    14.1     0.169

#The residuals for both of the them look pretty awful. I could say with certainty before even running a ljung-box test that neither set of residuals were going to resemble white noise. The 1,2,1 yields a lower p-value from the ljung-box test which is interesting, but the 1,2,1 also has autocorrelation at lag 7. Although you could argue autocorrelation at lag 7 does not really matter.I think I am going to choose the 1,2,1 soley because it closer resembles white noise. 



#e)


best.country.all %>%
  forecast(h = 10) %>%
  autoplot(best.country.gdp)

#This forecast looks completely feasible. A recession could certainly throw a wrinkle in the forecast, but those are nearly impossible to predict. So, I am going to conclude that this looks like a good forecast. 



#f)


best.country.ets <- best.country.gdp %>%
  model('Auto' = ETS(GDP),
        'Multiplicative Trend' = ETS(GDP ~ error("A") + trend("M") + season("N"))
        )
best.country.ets %>%
  report()

## Warning in report.mdl_df(.): Model reporting is only supported for individual
## models, so a glance will be shown. To see the report for a specific model, use
## `select()` and `filter()` to identify a single model.

## # A tibble: 2 x 9
##   .model                 sigma2 log_lik   AIC  AICc   BIC    MSE  AMSE    MAE
##   <chr>                   <dbl>   <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl>  <dbl>
## 1 Auto                 0.000455    23.8 -37.7 -36.5 -27.4 0.0280 0.128 0.0169
## 2 Multiplicative Trend 0.0318     -15.7  41.4  42.6  51.7 0.0296 0.137 0.0983

best.country.ets %>%
  forecast(h = 10) %>%
  autoplot(best.country.gdp)

#The ARIMA model produces a lower AICc value than the multiplicative model. There is a nice exponential curve to the multiplicative that I think suits GDP growth well. I will conclude however, that the ARIMA is likely the better model to use because of the lower AICC value.

Exercise 7

#a
japan <- aus_arrivals %>%
  filter(Origin == "Japan")

autoplot(japan)

## Plot variable not specified, automatically selected `.vars = Arrivals`

#Data does not appear to be stationary. Going to have to take a difference.



#b 

japan.diff <- japan %>%
  gg_tsdisplay(difference(Arrivals, 4) %>% difference(), plot_type = "partial" , lag = 16)

japan.diff

## Warning: Removed 5 row(s) containing missing values (geom_path).

## Warning: Removed 5 rows containing missing values (geom_point).

#Some heteroskedasticity for sure but it's stationary nonetheless. Big autocorrelation at lag 4 in ACF and PACF makes complete sense given our seasonality.



#c

#The ACF shows that there is autocorrelation at lag 4 and this is indicative of quarterly seasonality. 



#d 

#The PACF shows that even once the effects of the autocorrelation in the ACF are removed we still have autocorrelation. Again the autocorrelation shows up at lag 4 indicating that we have strong seasonal data. There is also autocorrelation now at lag 8 and 12 the past 2 and 3 years in the data. 



#e

#The significant spike in ACF at lag 4 suggest we use a season MA(1) component for sure. I would go with a ARIMA(0,1,1)(1,1,1)4 or an ARIMA(1,1,0)(1,1,1). We'll test the AIcc and see which is better.


#f

japan.fit <- japan %>%
  model(ARIMA(Arrivals))

report(japan.fit)

## Series: Arrivals 
## Model: ARIMA(0,1,1)(1,1,1)[4] 
## 
## Coefficients:
##           ma1     sar1     sma1
##       -0.4228  -0.2305  -0.5267
## s.e.   0.0944   0.1337   0.1246
## 
## sigma^2 estimated as 174801727:  log likelihood=-1330.66
## AIC=2669.32   AICc=2669.66   BIC=2680.54

japan.arima <- japan %>%
  model(ARIMA(Arrivals ~ pdq(0,1,1) + PDQ(1,1,1)))

report(japan.arima)

## Series: Arrivals 
## Model: ARIMA(0,1,1)(1,1,1)[4] 
## 
## Coefficients:
##           ma1     sar1     sma1
##       -0.4228  -0.2305  -0.5267
## s.e.   0.0944   0.1337   0.1246
## 
## sigma^2 estimated as 174801727:  log likelihood=-1330.66
## AIC=2669.32   AICc=2669.66   BIC=2680.54

japan.arima2 <- japan %>%
  model(ARIMA(Arrivals ~ pdq(1,1,0) + PDQ(1,1,1)))

report(japan.arima2)

## Series: Arrivals 
## Model: ARIMA(1,1,0)(1,1,1)[4] 
## 
## Coefficients:
##           ar1     sar1     sma1
##       -0.3091  -0.2129  -0.5534
## s.e.   0.0889   0.1321   0.1224
## 
## sigma^2 estimated as 180712140:  log likelihood=-1332.66
## AIC=2673.32   AICc=2673.67   BIC=2684.54

#The ARIMA(0,1,1)(1,1,1) has a lower AICc value so we're going to go with that one and that is the one ARIMA() gives us. 




#g

#(1-B-B4-B5)yt

Exercise 8

#a

Brazil_GDP <- read_excel("C:\\Users\\Zacha\\OneDrive\\Documents\\Brazil GDP.xls")

brazil.gdp <- Brazil_GDP %>%
  mutate(Date = yearquarter(Date)) %>%
  as_tsibble(index = Date) %>%
  mutate(GDP = Value)

#b


autoplot(brazil.gdp) + labs(title = "Quarterly Brazilian GDP")

## Plot variable not specified, automatically selected `.vars = Value`

brazil.gdp %>%
  features(GDP, unitroot_ndiffs)

## # A tibble: 1 x 1
##   ndiffs
##    <int>
## 1      1

brazil.gdp.compare <- brazil.gdp %>% 
  model(
    'Auto' = ARIMA(GDP),
    'Diffs Only' = ARIMA(GDP ~ pdq(0,2,0)),
    'Autoregression + Diff' = ARIMA(GDP ~ pdq(1,2,0)),
    "Moving Average + Diff" = ARIMA(GDP ~ pdq(0,2,1)),
    "All" = ARIMA(GDP ~ pdq(1,2,1))
        ) 


brazil.gdp.compare %>% 
  report()

## Warning in report.mdl_df(.): Model reporting is only supported for individual
## models, so a glance will be shown. To see the report for a specific model, use
## `select()` and `filter()` to identify a single model.

## # A tibble: 5 x 8
##   .model                sigma2 log_lik   AIC  AICc   BIC ar_roots  ma_roots 
##   <chr>                  <dbl>   <dbl> <dbl> <dbl> <dbl> <list>    <list>   
## 1 Auto                    2.48   -138.  281.  282.  288. <cpl [0]> <cpl [4]>
## 2 Diffs Only              3.96   -153.  313.  313.  320. <cpl [4]> <cpl [4]>
## 3 Autoregression + Diff   3.34   -147.  301.  302.  310. <cpl [5]> <cpl [4]>
## 4 Moving Average + Diff   2.54   -138.  282.  282.  289. <cpl [0]> <cpl [5]>
## 5 All                     2.54   -137.  283.  283.  292. <cpl [1]> <cpl [5]>

#The auto ARIMA generated the lowest AICc value as is expected. The auto ARIMA suggested an ARIMA (0,1,0)(0,0,1)[4] w/drift as the model. 



#c

brazilian.models <- brazil.gdp %>%
  model('Auto' = ARIMA(GDP ~ pdq(0,1,0) + PDQ(0,0,1)))


brazilian.models %>%
  gg_tsresiduals()

augment(brazilian.models) %>%
  features(.resid, ljung_box, lag = 10, dof = 0)

## # A tibble: 1 x 3
##   .model lb_stat lb_pvalue
##   <chr>    <dbl>     <dbl>
## 1 Auto      5.49     0.857

#I would not say these residuals resemble white noise. There's potential but it certainly has small trends throughout. There is no autocorrelation with is good to see and it appears to be relatively normally distributed. There's an obvious outlier which I can infer was caused by the 2008 financial crisis. My p-value for the ljung-box test was .856 which means it is no where near being white noise. 


#d


brazilian.models %>%
  forecast(h = 4) %>%
  autoplot(brazil.gdp) +
  labs(title = "Brazilian GDP Forecasted for Four Years")

#e

brazil.gdp %>% 
  model(
    'Auto' = ETS(GDP),
    'Simple' = ETS(GDP ~ error("A") + trend("N") + season("N")),
    'Holt' = ETS(GDP ~ error("A") + trend("A") + season("N")),
    'Multiplicative Trend' = ETS(GDP ~ error("A") + trend("M") + season("N"))
    ) %>% 
  report(brazil.gdp)

## Warning in report.mdl_df(., brazil.gdp): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.

## # A tibble: 4 x 9
##   .model                 sigma2 log_lik   AIC  AICc   BIC   MSE  AMSE     MAE
##   <chr>                   <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1 Auto                 0.000155   -196.  401.  402.  413.  2.51  6.01 0.00906
## 2 Simple               3.40       -207.  420.  420.  427.  3.31  9.90 1.44   
## 3 Holt                 2.66       -197.  403.  404.  415.  2.52  6.02 1.18   
## 4 Multiplicative Trend 2.65       -196.  403.  404.  415.  2.51  6.04 1.19

#The auto ETS model generated the lowest AICc value as is expected. The auto ETS model suggest I use a (M,A,N) model. However, I am going to use my intuition and go with the Multiplicative model because we have been told to avoid using multiplicative errors. 


#f


brazilian.man <- brazil.gdp %>%
  model('Auto' = ETS(GDP)) %>%
  report(brazil.gdp)

## Series: GDP 
## Model: ETS(M,A,N) 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.0001018537 
## 
##   Initial states:
##      l[0]      b[0]
##  99.71806 0.8686772
## 
##   sigma^2:  2e-04
## 
##      AIC     AICc      BIC 
## 401.1956 402.0652 412.7830

brazilian.man %>%
  gg_tsresiduals()

augment(brazilian.man) %>%
  features(.resid, ljung_box, lag = 10, dof = 0)

## # A tibble: 1 x 3
##   .model lb_stat lb_pvalue
##   <chr>    <dbl>     <dbl>
## 1 Auto      9.96     0.444

#The residuals look super similar to those from the ARIMA model almost identical. There's still some small but clear trends in the residuals. No autocorrelation again and the it looks relatively normally distributed. The ljung-box test provided a p-value of .444, so I can not say it resembles white noise. 


#g

brazilian.man %>%
  forecast(h = 4) %>%
  autoplot(brazil.gdp) +
    labs(title = "Brazil GDP Forecasted (A,M,N) for Four Years")

#h

brazil.gdp %>%
  stretch_tsibble(.init = 10) %>%
  model(
    'Holt' = ETS(GDP ~ error("A") + trend("A") + season("N")),
    'Auto ARIMA' = ARIMA(GDP ~ pdq(1,1,0))
    ) %>%
  forecast(h = 1) %>%
  accuracy(brazil.gdp)

## Warning: The future dataset is incomplete, incomplete out-of-sample data will be treated as missing. 
## 1 observation is missing at 2014 Q4

## # A tibble: 2 x 10
##   .model     .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
##   <chr>      <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Auto ARIMA Test  0.274  1.62  1.27 0.215 0.950 0.306 0.316 0.305
## 2 Holt       Test  0.136  1.76  1.35 0.113 1.03  0.325 0.343 0.394

#The auto ARIMA model produced a lower RMSE therefore I prefer that one.

HW #6 Rmarkdown

Zach Gooch

November 21st

Load packages and data

Exercise 1

Exercise 2

Exercise 3

Exercise 4

Exercise 5

Exercise 6

Exercise 7

Exercise 8