load packages
library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.1 v purrr 0.3.4
## v tibble 3.0.1 v dplyr 1.0.0
## v tidyr 1.1.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ----------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(tidymodels)
## -- Attaching packages ------------------------------------------------------------------- tidymodels 0.1.0 --
## v broom 0.5.6 v rsample 0.0.6
## v dials 0.0.6 v tune 0.1.0
## v infer 0.5.1 v workflows 0.1.1
## v parsnip 0.1.1 v yardstick 0.0.6
## v recipes 0.1.12
## -- Conflicts ---------------------------------------------------------------------- tidymodels_conflicts() --
## x scales::discard() masks purrr::discard()
## x dplyr::filter() masks stats::filter()
## x recipes::fixed() masks stringr::fixed()
## x dplyr::lag() masks stats::lag()
## x dials::margin() masks ggplot2::margin()
## x yardstick::spec() masks readr::spec()
## x recipes::step() masks stats::step()
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(skimr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(vip)
##
## Attaching package: 'vip'
## The following object is masked from 'package:utils':
##
## vi
library(fastshap)
##
## Attaching package: 'fastshap'
## The following object is masked from 'package:vip':
##
## gen_friedman
## The following object is masked from 'package:dplyr':
##
## explain
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(ISLR)
library(tree)
## Registered S3 method overwritten by 'tree':
## method from
## print.tree cli
library(ggplot2)
library(dplyr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(imputeTS)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(forecast)
##
## Attaching package: 'forecast'
## The following object is masked from 'package:yardstick':
##
## accuracy
library(urca)
library(pracma)
##
## Attaching package: 'pracma'
## The following object is masked from 'package:purrr':
##
## cross
library(astsa)
##
## Attaching package: 'astsa'
## The following object is masked from 'package:forecast':
##
## gas
library(fpp2)
## -- Attaching packages --------------------------------------------------------------------------- fpp2 2.4 --
## v fma 2.4 v expsmooth 2.3
## -- Conflicts ------------------------------------------------------------------------------ fpp2_conflicts --
## x forecast::accuracy() masks yardstick::accuracy()
## x pracma::cross() masks purrr::cross()
## x scales::discard() masks purrr::discard()
## x dials::margin() masks ggplot2::margin()
##
## Attaching package: 'fpp2'
## The following object is masked from 'package:astsa':
##
## oil
Read data and check for missing values
fed <- read.csv("C:/Wake Forest/BAN7070/Week 10/Week 10/IBL.csv")
summary(fed)
## LN_LOANS DATE LOANS
## Min. :3.288 Length:340 Min. : 26.80
## 1st Qu.:4.329 Class :character 1st Qu.: 75.85
## Median :5.039 Mode :character Median :154.35
## Mean :4.759 Mean :136.39
## 3rd Qu.:5.207 3rd Qu.:182.57
## Max. :5.677 Max. :292.10
head(fed)
## LN_LOANS DATE LOANS
## 1 3.288402 Jan-72 26.8
## 2 3.314186 Feb-72 27.5
## 3 3.321432 Mar-72 27.7
## 4 3.346389 Apr-72 28.4
## 5 3.411148 May-72 30.3
## 6 3.424263 Jun-72 30.7
# confirmed no NA's.
create time series, plot, and Ljung-Box for white noise
fed_ts <- ts(fed$LN_LOANS, start=c(1972), frequency = 12)
plot(fed_ts)

Box.test(fed_ts, lag=100, fitdf=0, type="Lj")
##
## Box-Ljung test
##
## data: fed_ts
## X-squared = 11906, df = 100, p-value < 2.2e-16
# H0 = The time series is white Noise
# Ha = The time series is not White Noise
# with a small p-value of less than .05 therefore we reject the null hypothesis (that the time series is white noise), and we conclude that the time series is not white noise.
ADF test for stationarity
Use the Single Mean Version of the Test
fed_df <- ur.df(fed_ts, type = "drift")
summary(fed_df)
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression drift
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.143618 -0.018268 -0.001085 0.019373 0.081568
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.037805 0.012539 3.015 0.00277 **
## z.lag.1 -0.006399 0.002604 -2.457 0.01451 *
## z.diff.lag -0.050940 0.054578 -0.933 0.35131
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02914 on 335 degrees of freedom
## Multiple R-squared: 0.01914, Adjusted R-squared: 0.01328
## F-statistic: 3.268 on 2 and 335 DF, p-value: 0.03928
##
##
## Value of test-statistic is: -2.4573 12.9392
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau2 -3.44 -2.87 -2.57
## phi1 6.47 4.61 3.79
# Ho = series is non-stationary and needs first difference
# Ha = series is stationary
# small p-value of 0.01451, so we reject the null hypothesis and conclude that this data is stationary and does not need a first difference.
Plot ACF and PACF
ggAcf(fed_ts)

ggPacf(fed_ts)

# Very slow decay of the ACF.
# the PACF has a significant spike at lag 1 that is positive.
# WIth a positive terms on the ACF and only one spike on the PACF, I will try a ARIMA(1,0,0)
fit an ARIMA model (start with AR(1))
# WIth a positive terms on the ACF and only one spike on the PACF, I will try a ARIMA(1,0,0)
fed_mod <- sarima(fed_ts, 1, 0, 0)
## initial value -0.490069
## iter 2 value -3.489287
## iter 3 value -3.508375
## iter 4 value -3.511802
## iter 5 value -3.512105
## iter 6 value -3.512111
## iter 7 value -3.512424
## iter 8 value -3.512432
## iter 9 value -3.519273
## iter 10 value -3.519351
## iter 11 value -3.519714
## iter 12 value -3.533535
## iter 13 value -3.535538
## iter 14 value -3.538925
## iter 15 value -3.538925
## iter 16 value -3.538932
## iter 17 value -3.539102
## iter 18 value -3.539294
## iter 19 value -3.539651
## iter 20 value -3.539739
## iter 21 value -3.539772
## iter 22 value -3.539773
## iter 23 value -3.539846
## iter 24 value -3.539908
## iter 25 value -3.539927
## iter 26 value -3.539931
## iter 27 value -3.539932
## iter 28 value -3.539944
## iter 29 value -3.539955
## iter 30 value -3.539961
## iter 31 value -3.539962
## iter 31 value -3.539962
## iter 31 value -3.539962
## final value -3.539962
## converged
## initial value -3.406661
## iter 2 value -3.444560
## iter 3 value -3.469070
## iter 4 value -3.480518
## iter 5 value -3.486596
## iter 6 value -3.489458
## iter 7 value -3.489509
## iter 8 value -3.490916
## iter 9 value -3.491114
## iter 10 value -3.491163
## iter 11 value -3.491166
## iter 12 value -3.491166
## iter 13 value -3.491166
## iter 14 value -3.491166
## iter 15 value -3.491166
## iter 16 value -3.491167
## iter 17 value -3.491167
## iter 18 value -3.491167
## iter 19 value -3.491167
## iter 20 value -3.491167
## iter 21 value -3.491167
## iter 22 value -3.491168
## iter 23 value -3.491168
## iter 24 value -3.491168
## iter 25 value -3.491168
## iter 26 value -3.491168
## iter 27 value -3.491169
## iter 28 value -3.491169
## iter 29 value -3.491169
## iter 30 value -3.491169
## iter 31 value -3.491169
## iter 32 value -3.491169
## iter 33 value -3.491170
## iter 34 value -3.491170
## iter 35 value -3.491170
## iter 36 value -3.491170
## iter 37 value -3.491170
## iter 38 value -3.491171
## iter 39 value -3.491171
## iter 40 value -3.491171
## iter 41 value -3.491171
## iter 42 value -3.491171
## iter 43 value -3.491172
## iter 44 value -3.491172
## iter 45 value -3.491172
## iter 46 value -3.491172
## iter 47 value -3.491172
## iter 48 value -3.491172
## iter 49 value -3.491173
## iter 50 value -3.491173
## iter 51 value -3.491173
## iter 52 value -3.491173
## iter 53 value -3.491173
## iter 54 value -3.491174
## iter 55 value -3.491174
## iter 56 value -3.491174
## iter 57 value -3.491174
## iter 58 value -3.491174
## iter 59 value -3.491174
## iter 60 value -3.491175
## iter 61 value -3.491175
## iter 62 value -3.491175
## iter 63 value -3.491175
## iter 64 value -3.491175
## iter 65 value -3.491176
## iter 66 value -3.491176
## iter 67 value -3.491176
## iter 68 value -3.491176
## iter 69 value -3.491176
## iter 70 value -3.491176
## iter 71 value -3.491177
## iter 72 value -3.491177
## iter 73 value -3.491177
## iter 74 value -3.491177
## iter 75 value -3.491177
## iter 76 value -3.491178
## iter 77 value -3.491178
## iter 78 value -3.491178
## iter 79 value -3.491178
## iter 80 value -3.491178
## iter 81 value -3.491178
## iter 82 value -3.491179
## iter 83 value -3.491179
## iter 84 value -3.491179
## iter 85 value -3.491179
## iter 86 value -3.491179
## iter 87 value -3.491180
## iter 88 value -3.491180
## iter 89 value -3.491180
## iter 90 value -3.491180
## iter 91 value -3.491180
## iter 92 value -3.491181
## iter 93 value -3.491181
## iter 94 value -3.491181
## iter 95 value -3.491181
## iter 96 value -3.491181
## iter 97 value -3.491181
## iter 98 value -3.491182
## iter 99 value -3.491182
## iter 100 value -3.491182
## final value -3.491182
## stopped after 100 iterations
## Warning in stats::arima(xdata, order = c(p, d, q), seasonal = list(order =
## c(P, : possible convergence problem: optim gave code = 1
## Warning in sqrt(diag(fitit$var.coef)): NaNs produced
## Warning in sqrt(diag(fitit$var.coef)): NaNs produced

summary(fed_mod)
## Length Class Mode
## fit 14 Arima list
## degrees_of_freedom 1 -none- numeric
## ttable 8 -none- numeric
## AIC 1 -none- numeric
## AICc 1 -none- numeric
## BIC 1 -none- numeric
fed_mod
## $fit
##
## Call:
## stats::arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D,
## Q), period = S), xreg = xmean, include.mean = FALSE, transform.pars = trans,
## fixed = fixed, optim.control = list(trace = trc, REPORT = 1, reltol = tol))
##
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
## ar1 xmean
## 0.9998 5.7083
## s.e. NaN NaN
##
## sigma^2 estimated as 0.0009063: log likelihood = 704.56, aic = -1403.13
##
## $degrees_of_freedom
## [1] 338
##
## $ttable
## Estimate SE t.value p.value
## ar1 0.9998 NaN NaN NaN
## xmean 5.7083 NaN NaN NaN
##
## $AIC
## [1] -4.12684
##
## $AICc
## [1] -4.126735
##
## $BIC
## [1] -4.093055
# the residuals from the Ljung Box test indicate p < 0.05 which means they are not white noise since we reject the null hypothesis
# Try auto arima
auto.arima(fed_ts)
## Series: fed_ts
## ARIMA(2,2,2)(1,0,0)[12]
##
## Coefficients:
## ar1 ar2 ma1 ma2 sar1
## -0.5358 -0.2047 -0.4881 -0.4322 -0.0243
## s.e. 0.1719 0.0647 0.1692 0.1798 0.0573
##
## sigma^2 estimated as 0.0008428: log likelihood=717.9
## AIC=-1423.8 AICc=-1423.55 BIC=-1400.87
# auto arima suggests (2,2,2)
fed.arima <- arima(fed_ts, order = c(2,2,2))
# a table of parameter estimates
summary(fed.arima)
##
## Call:
## arima(x = fed_ts, order = c(2, 2, 2))
##
## Coefficients:
## ar1 ar2 ma1 ma2
## -0.5431 -0.2045 -0.4822 -0.4427
## s.e. 0.1686 0.0657 0.1659 0.1778
##
## sigma^2 estimated as 0.0008306: log likelihood = 717.81, aic = -1425.62
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.0006856952 0.02873663 0.0223985 -0.02094456 0.4818343 0.9646525
## ACF1
## Training set 0.003051145
# residual plot of the chosen model
checkresiduals(fed.arima)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,2,2)
## Q* = 20.949, df = 20, p-value = 0.4001
##
## Model df: 4. Total lags used: 24
# p-value of .4001 from Ljung-Box test.
accuracy(fed.arima)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.0006856952 0.02873663 0.0223985 -0.02094456 0.4818343 0.9646525
## ACF1
## Training set 0.003051145
# RMSE value = 0.0287
# MAPE = 0.4818
# forecast plot
fed.arima %>%
forecast() %>%
autoplot()

# at this point we have the parameter estimates and we can produce a forecast