load packages

library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.1     v purrr   0.3.4
## v tibble  3.0.1     v dplyr   1.0.0
## v tidyr   1.1.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ----------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(tidymodels)
## -- Attaching packages ------------------------------------------------------------------- tidymodels 0.1.0 --
## v broom     0.5.6      v rsample   0.0.6 
## v dials     0.0.6      v tune      0.1.0 
## v infer     0.5.1      v workflows 0.1.1 
## v parsnip   0.1.1      v yardstick 0.0.6 
## v recipes   0.1.12
## -- Conflicts ---------------------------------------------------------------------- tidymodels_conflicts() --
## x scales::discard() masks purrr::discard()
## x dplyr::filter()   masks stats::filter()
## x recipes::fixed()  masks stringr::fixed()
## x dplyr::lag()      masks stats::lag()
## x dials::margin()   masks ggplot2::margin()
## x yardstick::spec() masks readr::spec()
## x recipes::step()   masks stats::step()
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(skimr)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(vip)        
## 
## Attaching package: 'vip'
## The following object is masked from 'package:utils':
## 
##     vi
library(fastshap)   
## 
## Attaching package: 'fastshap'
## The following object is masked from 'package:vip':
## 
##     gen_friedman
## The following object is masked from 'package:dplyr':
## 
##     explain
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(ISLR)
library(tree)
## Registered S3 method overwritten by 'tree':
##   method     from
##   print.tree cli
library(ggplot2)
library(dplyr)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(imputeTS)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(forecast)
## 
## Attaching package: 'forecast'
## The following object is masked from 'package:yardstick':
## 
##     accuracy
library(urca)
library(pracma)
## 
## Attaching package: 'pracma'
## The following object is masked from 'package:purrr':
## 
##     cross
library(astsa)
## 
## Attaching package: 'astsa'
## The following object is masked from 'package:forecast':
## 
##     gas
library(fpp2)
## -- Attaching packages --------------------------------------------------------------------------- fpp2 2.4 --
## v fma       2.4     v expsmooth 2.3
## -- Conflicts ------------------------------------------------------------------------------ fpp2_conflicts --
## x forecast::accuracy() masks yardstick::accuracy()
## x pracma::cross()      masks purrr::cross()
## x scales::discard()    masks purrr::discard()
## x dials::margin()      masks ggplot2::margin()
## 
## Attaching package: 'fpp2'
## The following object is masked from 'package:astsa':
## 
##     oil

Read data and check for missing values

fed <- read.csv("C:/Wake Forest/BAN7070/Week 10/Week 10/IBL.csv")
summary(fed)
##     LN_LOANS         DATE               LOANS       
##  Min.   :3.288   Length:340         Min.   : 26.80  
##  1st Qu.:4.329   Class :character   1st Qu.: 75.85  
##  Median :5.039   Mode  :character   Median :154.35  
##  Mean   :4.759                      Mean   :136.39  
##  3rd Qu.:5.207                      3rd Qu.:182.57  
##  Max.   :5.677                      Max.   :292.10
head(fed)
##   LN_LOANS   DATE LOANS
## 1 3.288402 Jan-72  26.8
## 2 3.314186 Feb-72  27.5
## 3 3.321432 Mar-72  27.7
## 4 3.346389 Apr-72  28.4
## 5 3.411148 May-72  30.3
## 6 3.424263 Jun-72  30.7
# confirmed no NA's.

create time series, plot, and Ljung-Box for white noise

fed_ts <- ts(fed$LN_LOANS, start=c(1972), frequency = 12)
plot(fed_ts)

Box.test(fed_ts, lag=100, fitdf=0, type="Lj")
## 
##  Box-Ljung test
## 
## data:  fed_ts
## X-squared = 11906, df = 100, p-value < 2.2e-16
# H0 = The time series is white Noise
# Ha = The time series is not White Noise
# with a small p-value of less than .05 therefore we reject the null hypothesis (that the time series is white noise), and we conclude that the time series is not white noise.

ADF test for stationarity

Use the Single Mean Version of the Test

fed_df <- ur.df(fed_ts, type = "drift")
summary(fed_df)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression drift 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.143618 -0.018268 -0.001085  0.019373  0.081568 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  0.037805   0.012539   3.015  0.00277 **
## z.lag.1     -0.006399   0.002604  -2.457  0.01451 * 
## z.diff.lag  -0.050940   0.054578  -0.933  0.35131   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02914 on 335 degrees of freedom
## Multiple R-squared:  0.01914,    Adjusted R-squared:  0.01328 
## F-statistic: 3.268 on 2 and 335 DF,  p-value: 0.03928
## 
## 
## Value of test-statistic is: -2.4573 12.9392 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau2 -3.44 -2.87 -2.57
## phi1  6.47  4.61  3.79
# Ho = series is non-stationary and needs first difference
# Ha = series is stationary
# small p-value of 0.01451, so we reject the null hypothesis and conclude that this data is stationary and does not need a first difference.

Plot ACF and PACF

ggAcf(fed_ts)

ggPacf(fed_ts)

# Very slow decay of the ACF.
# the PACF has a significant spike at lag 1 that is positive.

# WIth a positive terms on the ACF and only one spike on the PACF, I will try a ARIMA(1,0,0)

fit an ARIMA model (start with AR(1))

# WIth a positive terms on the ACF and only one spike on the PACF, I will try a ARIMA(1,0,0)
fed_mod <- sarima(fed_ts, 1, 0, 0)
## initial  value -0.490069 
## iter   2 value -3.489287
## iter   3 value -3.508375
## iter   4 value -3.511802
## iter   5 value -3.512105
## iter   6 value -3.512111
## iter   7 value -3.512424
## iter   8 value -3.512432
## iter   9 value -3.519273
## iter  10 value -3.519351
## iter  11 value -3.519714
## iter  12 value -3.533535
## iter  13 value -3.535538
## iter  14 value -3.538925
## iter  15 value -3.538925
## iter  16 value -3.538932
## iter  17 value -3.539102
## iter  18 value -3.539294
## iter  19 value -3.539651
## iter  20 value -3.539739
## iter  21 value -3.539772
## iter  22 value -3.539773
## iter  23 value -3.539846
## iter  24 value -3.539908
## iter  25 value -3.539927
## iter  26 value -3.539931
## iter  27 value -3.539932
## iter  28 value -3.539944
## iter  29 value -3.539955
## iter  30 value -3.539961
## iter  31 value -3.539962
## iter  31 value -3.539962
## iter  31 value -3.539962
## final  value -3.539962 
## converged
## initial  value -3.406661 
## iter   2 value -3.444560
## iter   3 value -3.469070
## iter   4 value -3.480518
## iter   5 value -3.486596
## iter   6 value -3.489458
## iter   7 value -3.489509
## iter   8 value -3.490916
## iter   9 value -3.491114
## iter  10 value -3.491163
## iter  11 value -3.491166
## iter  12 value -3.491166
## iter  13 value -3.491166
## iter  14 value -3.491166
## iter  15 value -3.491166
## iter  16 value -3.491167
## iter  17 value -3.491167
## iter  18 value -3.491167
## iter  19 value -3.491167
## iter  20 value -3.491167
## iter  21 value -3.491167
## iter  22 value -3.491168
## iter  23 value -3.491168
## iter  24 value -3.491168
## iter  25 value -3.491168
## iter  26 value -3.491168
## iter  27 value -3.491169
## iter  28 value -3.491169
## iter  29 value -3.491169
## iter  30 value -3.491169
## iter  31 value -3.491169
## iter  32 value -3.491169
## iter  33 value -3.491170
## iter  34 value -3.491170
## iter  35 value -3.491170
## iter  36 value -3.491170
## iter  37 value -3.491170
## iter  38 value -3.491171
## iter  39 value -3.491171
## iter  40 value -3.491171
## iter  41 value -3.491171
## iter  42 value -3.491171
## iter  43 value -3.491172
## iter  44 value -3.491172
## iter  45 value -3.491172
## iter  46 value -3.491172
## iter  47 value -3.491172
## iter  48 value -3.491172
## iter  49 value -3.491173
## iter  50 value -3.491173
## iter  51 value -3.491173
## iter  52 value -3.491173
## iter  53 value -3.491173
## iter  54 value -3.491174
## iter  55 value -3.491174
## iter  56 value -3.491174
## iter  57 value -3.491174
## iter  58 value -3.491174
## iter  59 value -3.491174
## iter  60 value -3.491175
## iter  61 value -3.491175
## iter  62 value -3.491175
## iter  63 value -3.491175
## iter  64 value -3.491175
## iter  65 value -3.491176
## iter  66 value -3.491176
## iter  67 value -3.491176
## iter  68 value -3.491176
## iter  69 value -3.491176
## iter  70 value -3.491176
## iter  71 value -3.491177
## iter  72 value -3.491177
## iter  73 value -3.491177
## iter  74 value -3.491177
## iter  75 value -3.491177
## iter  76 value -3.491178
## iter  77 value -3.491178
## iter  78 value -3.491178
## iter  79 value -3.491178
## iter  80 value -3.491178
## iter  81 value -3.491178
## iter  82 value -3.491179
## iter  83 value -3.491179
## iter  84 value -3.491179
## iter  85 value -3.491179
## iter  86 value -3.491179
## iter  87 value -3.491180
## iter  88 value -3.491180
## iter  89 value -3.491180
## iter  90 value -3.491180
## iter  91 value -3.491180
## iter  92 value -3.491181
## iter  93 value -3.491181
## iter  94 value -3.491181
## iter  95 value -3.491181
## iter  96 value -3.491181
## iter  97 value -3.491181
## iter  98 value -3.491182
## iter  99 value -3.491182
## iter 100 value -3.491182
## final  value -3.491182 
## stopped after 100 iterations
## Warning in stats::arima(xdata, order = c(p, d, q), seasonal = list(order =
## c(P, : possible convergence problem: optim gave code = 1
## Warning in sqrt(diag(fitit$var.coef)): NaNs produced

## Warning in sqrt(diag(fitit$var.coef)): NaNs produced

summary(fed_mod)
##                    Length Class  Mode   
## fit                14     Arima  list   
## degrees_of_freedom  1     -none- numeric
## ttable              8     -none- numeric
## AIC                 1     -none- numeric
## AICc                1     -none- numeric
## BIC                 1     -none- numeric
fed_mod
## $fit
## 
## Call:
## stats::arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, 
##     Q), period = S), xreg = xmean, include.mean = FALSE, transform.pars = trans, 
##     fixed = fixed, optim.control = list(trace = trc, REPORT = 1, reltol = tol))
## 
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
##          ar1   xmean
##       0.9998  5.7083
## s.e.     NaN     NaN
## 
## sigma^2 estimated as 0.0009063:  log likelihood = 704.56,  aic = -1403.13
## 
## $degrees_of_freedom
## [1] 338
## 
## $ttable
##       Estimate  SE t.value p.value
## ar1     0.9998 NaN     NaN     NaN
## xmean   5.7083 NaN     NaN     NaN
## 
## $AIC
## [1] -4.12684
## 
## $AICc
## [1] -4.126735
## 
## $BIC
## [1] -4.093055
# the residuals from the Ljung Box test indicate p < 0.05 which means they are not white noise since we reject the null hypothesis

# Try auto arima

auto.arima(fed_ts)
## Series: fed_ts 
## ARIMA(2,2,2)(1,0,0)[12] 
## 
## Coefficients:
##           ar1      ar2      ma1      ma2     sar1
##       -0.5358  -0.2047  -0.4881  -0.4322  -0.0243
## s.e.   0.1719   0.0647   0.1692   0.1798   0.0573
## 
## sigma^2 estimated as 0.0008428:  log likelihood=717.9
## AIC=-1423.8   AICc=-1423.55   BIC=-1400.87
# auto arima suggests (2,2,2)

fed.arima <- arima(fed_ts, order = c(2,2,2))
# a table of parameter estimates
summary(fed.arima)
## 
## Call:
## arima(x = fed_ts, order = c(2, 2, 2))
## 
## Coefficients:
##           ar1      ar2      ma1      ma2
##       -0.5431  -0.2045  -0.4822  -0.4427
## s.e.   0.1686   0.0657   0.1659   0.1778
## 
## sigma^2 estimated as 0.0008306:  log likelihood = 717.81,  aic = -1425.62
## 
## Training set error measures:
##                         ME       RMSE       MAE         MPE      MAPE      MASE
## Training set -0.0006856952 0.02873663 0.0223985 -0.02094456 0.4818343 0.9646525
##                     ACF1
## Training set 0.003051145
# residual plot of the chosen model
checkresiduals(fed.arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,2,2)
## Q* = 20.949, df = 20, p-value = 0.4001
## 
## Model df: 4.   Total lags used: 24
# p-value of .4001 from Ljung-Box test.


accuracy(fed.arima)
##                         ME       RMSE       MAE         MPE      MAPE      MASE
## Training set -0.0006856952 0.02873663 0.0223985 -0.02094456 0.4818343 0.9646525
##                     ACF1
## Training set 0.003051145
# RMSE value = 0.0287
# MAPE = 0.4818


# forecast plot
fed.arima %>%
  forecast() %>%
  autoplot()

# at this point we have the parameter estimates and we can produce a forecast