Data 624 HW 6

library(fpp3)

## -- Attaching packages -------------------------------------------- fpp3 0.4.0 --

## v tibble      3.1.4          v tsibble     1.0.1.9999
## v dplyr       1.0.7          v tsibbledata 0.3.0.9000
## v tidyr       1.1.3          v feasts      0.2.2     
## v lubridate   1.7.10         v fable       0.3.1     
## v ggplot2     3.3.5

## -- Conflicts ------------------------------------------------- fpp3_conflicts --
## x lubridate::date()    masks base::date()
## x dplyr::filter()      masks stats::filter()
## x tsibble::intersect() masks base::intersect()
## x tsibble::interval()  masks lubridate::interval()
## x dplyr::lag()         masks stats::lag()
## x tsibble::setdiff()   masks base::setdiff()
## x tsibble::union()     masks base::union()

library(forecast)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library(glue)

## 
## Attaching package: 'glue'

## The following object is masked from 'package:dplyr':
## 
##     collapse

HW 6

9.1

Figure 9.32 shows the ACFs for 36 random numbers, 360 random numbers and 1,000 random numbers.

Explain the differences among these figures. Do they all indicate that the data are white noise?

The figures indicate random noise, as best as I can tell they are within the dashed lines of the criital values

Why are the critical values at different distances from the mean of zero? Why are the autocorrelations different in each figure when they each refer to white noise?

The larger the sample the smaller the critical value becomes (as it is over the square root of the sample size). Apparent autocorrelation will frequently occur in small sample sets by chance, while larger sets increase the rareness of runs.

9.2

A classic example of a non-stationary series are stock prices. Plot the daily closing prices for Amazon stock (contained in gafa_stock), along with the ACF and PACF. Explain how each plot shows that the series is non-stationary and should be differenced.

amznStock <-gafa_stock%>% filter(Symbol=='AMZN')
amznStock %>% autoplot( vars(Close))

amznStock %>% gg_tsdisplay(plot="partial", y=Close)

## Warning: Provided data has an irregular interval, results should be treated with
## caution. Computing ACF by observation.

## Warning: Provided data has an irregular interval, results should be treated with
## caution. Computing ACF by observation.

It is rather clear that the plot isn’t horizontal but has a strong trend.

amznStock %>% gg_tsdisplay(difference(Close), plot_type='partial')

## Warning: Provided data has an irregular interval, results should be treated with
## caution. Computing ACF by observation.

## Warning: Provided data has an irregular interval, results should be treated with
## caution. Computing ACF by observation.

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

Running a simple difference we see a much more horizontal plot, though the increasing trend causes the differences to be magnified versus the initial stock price. This suggests a box-cox transformation might be a good idea.

9.3

For the following series, find an appropriate Box-Cox transformation and order of differencing in order to obtain stationary data.

Turkish GDP from global_economy.

turkishGDP <- global_economy%>%filter(Country=="Turkey")
lambda<-BoxCox.lambda((turkishGDP%>% select(GDP))[[1]])
turkishGDP<-turkishGDP%>% mutate(boxGDP = box_cox(GDP,lambda=lambda))
turkishGDP%>% gg_tsdisplay(boxGDP,plot_type = "partial")+labs(title=glue("no difference lambda:{lambda}"))

turkishGDP <- global_economy%>%filter(Country=="Turkey")%>%mutate(difGDP=difference(GDP))
lambda<-BoxCox.lambda((turkishGDP%>% select(difGDP))[[1]])

## Warning in guerrero(x, lower, upper): Guerrero's method for selecting a Box-Cox
## parameter (lambda) is given for strictly positive data.

turkishGDP<-turkishGDP%>% mutate(boxGDP = box_cox(difGDP,lambda))
turkishGDP%>% gg_tsdisplay(boxGDP,plot_type = "partial")+labs(title=glue("single difference lambda:{lambda}"))

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

turkishGDP <- global_economy%>%filter(Country=="Turkey")%>%mutate(difGDP=difference(GDP,differences=2))
lambda<-BoxCox.lambda((turkishGDP%>% select(difGDP))[[1]])

## Warning in guerrero(x, lower, upper): Guerrero's method for selecting a Box-Cox
## parameter (lambda) is given for strictly positive data.

turkishGDP<-turkishGDP%>% mutate(boxGDP = box_cox(difGDP,lambda))
turkishGDP%>% gg_tsdisplay(boxGDP,plot_type = "partial")+labs(title=glue("double difference lambda:{lambda}"))

## Warning: Removed 2 row(s) containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_point).

A single difference with a box cox lambda of .122 should be decent

Accommodation takings in the state of Tasmania from aus_accommodation.

tasmaniaTakings <-aus_accommodation%>%filter(State=="Tasmania")
lambda<-BoxCox.lambda((tasmaniaTakings%>% as.tibble%>% select(Takings ))[[1]])

## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.

tasmaniaTakings<-tasmaniaTakings%>% mutate(boxTakings  = box_cox(Takings ,lambda=lambda))
tasmaniaTakings%>% gg_tsdisplay(boxTakings ,plot_type = "partial")+labs(title=glue("no difference lambda:{lambda}"))

tasmaniaTakings <-aus_accommodation%>%filter(State=="Tasmania")%>%mutate(difTakings=difference(Takings ))
lambda<-BoxCox.lambda((tasmaniaTakings%>% as.tibble%>% select(difTakings ))[[1]])

## Warning in guerrero(x, lower, upper): Guerrero's method for selecting a Box-Cox
## parameter (lambda) is given for strictly positive data.

turkishGDP<-tasmaniaTakings %>% mutate(boxTakings  = box_cox(difTakings ,lambda))
turkishGDP%>% gg_tsdisplay(boxTakings ,plot_type = "partial")+labs(title=glue("single difference lambda:{lambda}"))

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

tasmaniaTakings <- aus_accommodation%>%filter(State=="Tasmania")%>%mutate(difTakings =difference(Takings ,differences=2))
lambda<-BoxCox.lambda((tasmaniaTakings %>% as.tibble%>% select(difTakings ))[[1]])

## Warning in guerrero(x, lower, upper): Guerrero's method for selecting a Box-Cox
## parameter (lambda) is given for strictly positive data.

tasmaniaTakings<-tasmaniaTakings %>% mutate(boxTakings  = box_cox(difTakings ,lambda))
tasmaniaTakings%>% gg_tsdisplay(boxTakings ,plot_type = "partial")+labs(title=glue("double difference lambda:{lambda}"))

## Warning: Removed 2 row(s) containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_point).

A double difference seems preferable.

Monthly sales from souvenirs.

ourSouvenirs <-souvenirs
lambda<-BoxCox.lambda((ourSouvenirs%>% as.tibble%>% select(Sales ))[[1]])
ourSouvenirs<-ourSouvenirs%>% mutate(boxSales  = box_cox(Sales ,lambda=lambda))
ourSouvenirs%>% gg_tsdisplay(boxSales ,plot_type = "partial")+labs(title=glue("no difference lambda:{lambda}"))

ourSouvenirs <-souvenirs%>%mutate(difSales=difference(Sales ))
lambda<-BoxCox.lambda((ourSouvenirs%>% as.tibble%>% select(difSales ))[[1]])

## Warning in guerrero(x, lower, upper): Guerrero's method for selecting a Box-Cox
## parameter (lambda) is given for strictly positive data.

ourSouvenirs<-ourSouvenirs %>% mutate(boxSales  = box_cox(difSales ,lambda))
ourSouvenirs%>% gg_tsdisplay(boxSales ,plot_type = "partial")+labs(title=glue("single difference lambda:{lambda}"))

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

ourSouvenirs <- ourSouvenirs%>%mutate(difSales =difference(Sales ,differences=2))
lambda<-BoxCox.lambda((ourSouvenirs %>% as.tibble%>% select(difSales ))[[1]])

## Warning in guerrero(x, lower, upper): Guerrero's method for selecting a Box-Cox
## parameter (lambda) is given for strictly positive data.

ourSouvenirs<-ourSouvenirs %>% mutate(boxSales  = box_cox(difSales ,lambda))
ourSouvenirs%>% gg_tsdisplay(boxSales ,plot_type = "partial")+labs(title=glue("double difference lambda:{lambda}"))

## Warning: Removed 2 row(s) containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_point).

A single difference seems correct, but again box cox doesn’t seem to entirely correct the seasonal trend differences.

9.5

For your retail data (from Exercise 8 in Section 2.10), find the appropriate order of differencing (after transformation if necessary) to obtain stationary data.

set.seed(12345678-4321)
myseries <- aus_retail %>%
  filter(`Series ID` == sample(aus_retail$`Series ID`,1))
lambda<-BoxCox.lambda((myseries%>% select(Turnover))[[1]])
myseries<-myseries%>% mutate(boxTurnover = box_cox(Turnover,lambda=lambda))%>%mutate(difTurnover =difference(boxTurnover ,differences=1))
myseries%>% gg_tsdisplay(difTurnover,plot_type = "partial") +labs(Title="Single difference")

## Warning: Removed 1 row(s) containing missing values (geom_path).

## Warning: Removed 1 rows containing missing values (geom_point).

myseries <- myseries %>% mutate(difTurnover =difference(boxTurnover ,differences=2))
myseries%>% gg_tsdisplay(difTurnover,plot_type = "partial") +labs(Title="Double difference")

## Warning: Removed 2 row(s) containing missing values (geom_path).

## Warning: Removed 2 rows containing missing values (geom_point).

Neither single or double difference with box cox can avoid all auto correlations, but a single difference seems to suffice to make it reasonably stationary, though there does seem to be an uncorrectable difference in seasonal patterns. ## 9.6

imulate and plot some data from simple ARIMA models.

Use the following R code to generate data from an AR(1) model with

ϕ 1 = 0.6 and
σ 2 = 1 . The process starts with
y 1 = 0 .

set.seed(6789)
y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
  y[i] <- 0.6*y[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)

Produce a time plot for the series. How does the plot change as you change
ϕ 1 ?

sim%>%autoplot()

## Plot variable not specified, automatically selected `.vars = y`

y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
  y[i] <- 1*y[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
sim%>%autoplot()+labs(title='Theta=1')

## Plot variable not specified, automatically selected `.vars = y`

Write your own code to generate data from an MA(1) model with
θ 1 = 0.6 and
σ 2 = 1 .

sim%>%autoplot()

## Plot variable not specified, automatically selected `.vars = y`

y <- numeric(100)
e <- rnorm(100)
for(i in 2:100)
  y[i] <- 1*y[i-1] + e[i]
sim <- tsibble(idx = seq_len(100), y = y, index = idx)
sim%>%autoplot()+labs(title='Theta=1')

## Plot variable not specified, automatically selected `.vars = y`

Produce a time plot for the series. How does the plot change as you change
θ 1 ?

Generate data from an ARMA(1,1) model with
ϕ 1 = 0.6 ,
θ 1 = 0.6 and
σ 2 = 1 .

Generate data from an AR(2) model with
ϕ 1 = − 0.8 ,
ϕ 2 = 0.3 and
σ 2 = 1 . (Note that these parameters will give a non-stationary series.)

Graph the latter two series and compare them.

9.7

Consider aus_airpassengers, the total number of passengers (in millions) from Australian air carriers for the period 1970-2011.

Use ARIMA() to find an appropriate ARIMA model. What model was selected. Check that the residuals look like white noise. Plot forecasts for the next 10 periods.

ausAir<-aus_airpassengers%>%model(ARIMA(Passengers))
report(ausAir)

## Series: Passengers 
## Model: ARIMA(0,2,1) 
## 
## Coefficients:
##           ma1
##       -0.8963
## s.e.   0.0594
## 
## sigma^2 estimated as 4.308:  log likelihood=-97.02
## AIC=198.04   AICc=198.32   BIC=201.65

ausAir[[1]][[1]]$fit%>%checkresiduals()

## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

ausAir%>%forecast(h=10)%>% autoplot(aus_airpassengers)

Looks good

Write the model in terms of the backshift operator.

\[ y'_{t} = y_{t} - y_{t-1} = y_t - By_{t} = (1 - B)y_{t}\: \]

Plot forecasts from an ARIMA(0,1,0) model with drift and compare these to part a.

ausAir<-aus_airpassengers%>%model(arimaDefault = ARIMA(Passengers), arima010 = ARIMA(Passengers ~ 1 +pdq(0,1,0)) )
report(ausAir)

## Warning in report.mdl_df(ausAir): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.

## # A tibble: 2 x 8
##   .model       sigma2 log_lik   AIC  AICc   BIC ar_roots  ma_roots 
##   <chr>         <dbl>   <dbl> <dbl> <dbl> <dbl> <list>    <list>   
## 1 arimaDefault   4.31   -97.0  198.  198.  202. <cpl [0]> <cpl [1]>
## 2 arima010       4.27   -98.2  200.  201.  204. <cpl [0]> <cpl [0]>

ausAir[[2]][[1]]$fit%>%checkresiduals()

## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

ausAir%>%forecast(h=10)%>% autoplot(aus_airpassengers)

d) Plot forecasts from an ARIMA(2,1,2) model with drift and compare these to parts a and c. Remove the constant and see what happens.

ausAir<-aus_airpassengers%>%model(arimaDefault = ARIMA(Passengers), arima010 = ARIMA(Passengers ~ 1 +pdq(0,1,0)), arima212= ARIMA(Passengers ~ 1 +pdq(2,1,2)) )
report(ausAir )

## Warning in report.mdl_df(ausAir): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.

## # A tibble: 3 x 8
##   .model       sigma2 log_lik   AIC  AICc   BIC ar_roots  ma_roots 
##   <chr>         <dbl>   <dbl> <dbl> <dbl> <dbl> <list>    <list>   
## 1 arimaDefault   4.31   -97.0  198.  198.  202. <cpl [0]> <cpl [1]>
## 2 arima010       4.27   -98.2  200.  201.  204. <cpl [0]> <cpl [0]>
## 3 arima212       4.03   -96.2  204.  207.  215. <cpl [2]> <cpl [2]>

ausAir[[3]][[1]]$fit%>%checkresiduals()

## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

ausAir%>%forecast(h=10)%>% autoplot(aus_airpassengers)

ausAir<-aus_airpassengers%>%model(arimaDefault = ARIMA(Passengers), arima212= ARIMA(Passengers ~ 1 +pdq(2,1,2)), arima212noConstant= ARIMA(Passengers ~ -1 ) )
report(ausAir )

## Warning in report.mdl_df(ausAir): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.

## # A tibble: 3 x 8
##   .model             sigma2 log_lik   AIC  AICc   BIC ar_roots  ma_roots 
##   <chr>               <dbl>   <dbl> <dbl> <dbl> <dbl> <list>    <list>   
## 1 arimaDefault         4.31   -97.0  198.  198.  202. <cpl [0]> <cpl [1]>
## 2 arima212             4.03   -96.2  204.  207.  215. <cpl [2]> <cpl [2]>
## 3 arima212noConstant   4.31   -97.0  198.  198.  202. <cpl [0]> <cpl [1]>

ausAir[[3]][[1]]$fit%>%checkresiduals()

## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

ausAir%>%forecast(h=10)%>% autoplot(aus_airpassengers)

e) Plot forecasts from an ARIMA(0,2,1) model with a constant. What happens?

```r
ausAir<-aus_airpassengers%>%model(arimaDefault = ARIMA(Passengers), arima010 = ARIMA(Passengers ~ 1 +pdq(0,1,0)) )
report(ausAir)

## Warning in report.mdl_df(ausAir): Model reporting is only supported for
## individual models, so a glance will be shown. To see the report for a specific
## model, use `select()` and `filter()` to identify a single model.

## # A tibble: 2 x 8
##   .model       sigma2 log_lik   AIC  AICc   BIC ar_roots  ma_roots 
##   <chr>         <dbl>   <dbl> <dbl> <dbl> <dbl> <list>    <list>   
## 1 arimaDefault   4.31   -97.0  198.  198.  202. <cpl [0]> <cpl [1]>
## 2 arima010       4.27   -98.2  200.  201.  204. <cpl [0]> <cpl [0]>

ausAir[[2]][[1]]$fit%>%checkresiduals()

## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

ausAir%>%forecast(h=10)%>% autoplot(aus_airpassengers)

## 9.8

For the United States GDP series (from global_economy):

```r
usaGDP <- global_economy%>%filter(Country=="United States")

if necessary, find a suitable Box-Cox transformation for the data;

lambda<-BoxCox.lambda((usaGDP%>% select(GDP))[[1]])
usaGDP<-usaGDP%>% mutate(boxGDP = box_cox(GDP,lambda=lambda))
usaGDP%>% gg_tsdisplay(boxGDP,plot_type = "partial")+labs(title=glue("USA GDP lambda:{lambda}"))

b) fit a suitable ARIMA model to the transformed data using ARIMA();

usaGDPmodel<-usaGDP%>% model(ARIMA(boxGDP))
report(usaGDPmodel)

## Series: boxGDP 
## Model: ARIMA(1,1,0) w/ drift 
## 
## Coefficients:
##          ar1  constant
##       0.4586  118.2764
## s.e.  0.1198    9.5123
## 
## sigma^2 estimated as 5488:  log likelihood=-325.37
## AIC=656.74   AICc=657.19   BIC=662.87

try some other plausible models by experimenting with the orders chosen;

usaGDPmodel<-usaGDP%>% model(ARIMA(boxGDP~0+pdq(1,1,1)+PDQ(0,1,1)))
report(usaGDPmodel)

## Series: boxGDP 
## Model: ARIMA(1,1,1) 
## 
## Coefficients:
##          ar1      ma1
##       0.9905  -0.6583
## s.e.  0.0126   0.1460
## 
## sigma^2 estimated as 6094:  log likelihood=-329.45
## AIC=664.9   AICc=665.35   BIC=671.02

usaGDPmodel<-usaGDP%>% model(ARIMA(boxGDP~1+pdq(1,0,1)+PDQ(0,0,1)))
report(usaGDPmodel)

## Series: boxGDP 
## Model: ARIMA(1,0,1) w/ mean 
## 
## Coefficients:
##       ar1     ma1  constant
##         1  0.7673    0.0192
## s.e.    0  0.0668    0.0342
## 
## sigma^2 estimated as 23800:  log likelihood=-373.96
## AIC=755.92   AICc=756.67   BIC=764.16

choose what you think is the best model and check the residual diagnostics;

usaGDPmodel<-usaGDP%>% model(ARIMA(boxGDP))
usaGDPmodel[[2]][[1]]$fit %>% checkresiduals()

## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

e) produce forecasts of your fitted model. Do the forecasts look reasonable?

fit<-usaGDPmodel%>%forecast(h=10)
fit%>%autoplot(usaGDP)

f) compare the results with what you would obtain using ETS() (with no transformation).

usaGDPmodel<-usaGDP%>% model( ets=ETS(GDP))
fit<-usaGDPmodel%>%forecast(h=10)
fit%>%autoplot(usaGDP)

Data 624 HW 6

Scott Reed

10/18/2021

HW 6

9.1

9.2

9.3

9.5

9.7