#Plotting time series
library(tseries)
## Registered S3 method overwritten by 'xts':
## method from
## as.zoo.xts zoo
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(urca)
library(forecast)
## Registered S3 methods overwritten by 'forecast':
## method from
## fitted.fracdiff fracdiff
## residuals.fracdiff fracdiff
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
loans <- ts(c(46.5, 47, 47.5, 48.3, 49.1, 50.1, 51.1, 52, 53.2, 53.9, 54.5, 55.2, 55.6, 55.7, 56.1,
56.8, 57.5, 58.3, 58.9, 59.4, 59.8, 60, 60, 60.3, 60.1, 59.7, 59.5, 59.4, 59.3, 59.2,
59.1, 59, 59.3, 59.5, 59.5, 59.5, 59.7, 59.7, 60.5, 60.7, 61.3, 61.4, 61.8,
62.4, 62.4, 62.9, 63.2, 63.4, 63.9, 64.5, 65, 65.4, 66.3, 67.7, 69, 70, 71.4,
72.5, 73.4, 74.6, 75.2, 75.9, 76.8, 77.9, 79.2, 80.5, 82.6, 84.4, 85.9, 87.6) ,frequency = 12)
##Removing Seasonality
loans %>% stl(s.window='periodic') %>% seasadj() -> loans_adj
par(mar=c(3,3,3,3))
par(mfrow = c(1,3))
plot.ts(loans_adj)
acf(loans_adj)
pacf(loans_adj)

#Lets check for the stationarity of the time series 'loans_adj'
##Augmented Dickey-Fuller Test
adf.test(loans_adj)
##
## Augmented Dickey-Fuller Test
##
## data: loans_adj
## Dickey-Fuller = -0.39076, Lag order = 4, p-value = 0.984
## alternative hypothesis: stationary
#Here, we see that the p-value is greater than the alpha value. Hence, we do not have enough evidence to reject null hypothsis. Given time series 'loans' is non-stationary. We will have to check if the time series 'loans' is homogenous non-stationary or not.
##Augmented Dickey-Fuller Test Unit Root Test
test <- ur.df(loans, type = "trend", lags = 3)
summary(test)
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression trend
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.55248 -0.21105 0.03189 0.18370 0.80280
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.310572 0.646177 0.481 0.6325
## z.lag.1 -0.007655 0.013711 -0.558 0.5787
## tt 0.006546 0.005528 1.184 0.2410
## z.diff.lag1 0.529458 0.126175 4.196 9.1e-05 ***
## z.diff.lag2 0.244167 0.142626 1.712 0.0921 .
## z.diff.lag3 0.150163 0.135170 1.111 0.2710
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2879 on 60 degrees of freedom
## Multiple R-squared: 0.7386, Adjusted R-squared: 0.7169
## F-statistic: 33.92 on 5 and 60 DF, p-value: 2.789e-16
##
##
## Value of test-statistic is: -0.5583 1.5059 1.7879
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau3 -4.04 -3.45 -3.15
## phi2 6.50 4.88 4.16
## phi3 8.73 6.49 5.47
#Since the value of test statistic -3908 is greater than -3.15, we cannot reject the null hypothesis of non-stationarity.
##Performing KPSS Unit root Test
KPSS <- ur.kpss(loans_adj)
summary(KPSS)
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 3 lags.
##
## Value of test-statistic is: 1.6054
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
#Since, test statistic value 1.6054 is greater than 0.347 we reject the null hypothesis of stationarity
##We will have to transform the time series to stationary series by taking the difference for modelling purposes.
loans_adj %>% diff() %>% ggtsdisplay(main="")

#To check if the first order difference is stationary. Let us do ADF test.
loans_adj %>% diff() %>% adf.test()
##
## Augmented Dickey-Fuller Test
##
## data: .
## Dickey-Fuller = -1.5927, Lag order = 4, p-value = 0.7407
## alternative hypothesis: stationary
#Since p-vale is greater than alpha, we cannot reject the null-hypothesis of non-stationarity.
##We will have to take second order difference.
loans_adj %>% diff() %>% diff() %>% ggtsdisplay(main="")

#Let us do the ADF test for the second order difference of time series loans_adj.
loans_adj %>% diff() %>% diff() %>% adf.test()
## Warning in adf.test(.): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: .
## Dickey-Fuller = -4.199, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
#The p value is less than the alpha value, we now reject the null hypothesis of non-stationarity. We also observe that the ACF is getting cut at 1 after tkaing the difference. Therefore, the time series can be a M.A(1) model after taking the difference.
## Let us now fit this time series into model and estimate its parameters.
loans_fit <- Arima(loans_adj,order = c(0,2,1),include.constant = TRUE)
loans_fit
## Series: loans_adj
## ARIMA(0,2,1)
##
## Coefficients:
## ma1
## -0.4778
## s.e. 0.1050
##
## sigma^2 estimated as 0.09123: log likelihood=-14.68
## AIC=33.36 AICc=33.55 BIC=37.8
#The estimated value of the parameter theta1 is -0.4778.
##Let us check with few other models to check if we are getting the lowest AIC and BIC values.
loans_fit2 <- Arima(loans_adj,order = c(1,2,1),include.constant = TRUE)
loans_fit3 <- Arima(loans_adj,order = c(0,2,2),include.constant = TRUE)
loans_fit2
## Series: loans_adj
## ARIMA(1,2,1)
##
## Coefficients:
## ar1 ma1
## -0.3265 -0.1932
## s.e. 0.2453 0.2573
##
## sigma^2 estimated as 0.09081: log likelihood=-14.02
## AIC=34.04 AICc=34.41 BIC=40.7
loans_fit3
## Series: loans_adj
## ARIMA(0,2,2)
##
## Coefficients:
## ma1 ma2
## -0.5038 0.1325
## s.e. 0.1156 0.1295
##
## sigma^2 estimated as 0.09129: log likelihood=-14.19
## AIC=34.38 AICc=34.76 BIC=41.04
#We see that the lowest AIC and BIC values are in the loans_fit i.e. Arima(0,2,1). Let us check for the residuals of the model loans_fit.
checkresiduals(loans_fit)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,2,1)
## Q* = 14.118, df = 13, p-value = 0.3656
##
## Model df: 1. Total lags used: 14
#The property of white noise is shown by the residuals of the model. The plot of ACF shows that they are uncorrelated and histogram shows the normality.
autoplot(loans_fit)

#The autoplot shows that the MA root of our fitted lies within the unit root circle showing that the time series loans_fit is stationarity in nature
##Let us try and check using the auto.arima function to verify the model
loans_auto <- auto.arima(loans_adj)
loans_auto
## Series: loans_adj
## ARIMA(0,2,1)(1,0,0)[12]
##
## Coefficients:
## ma1 sar1
## -0.5122 0.2058
## s.e. 0.1047 0.1261
##
## sigma^2 estimated as 0.08846: log likelihood=-13.4
## AIC=32.8 AICc=33.18 BIC=39.46
#The results of auto.arima function shows that the model chosen is correct i.e. Arima(0,2,1)
##Now, we conclude that the given time series loans_adj is an Arima(0,2,1) model and below is its estimated parameter
###theta1 = -0.5122
#Let us now find the forecast using this model for next 24 time points in futre
forecast(loans_fit)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Nov 6 88.21484 87.82776 88.60191 87.62285 88.80682
## Dec 6 89.69301 88.98804 90.39798 88.61486 90.77117
## Jan 7 91.17119 90.11140 92.23098 89.55039 92.79199
## Feb 7 92.64937 91.19677 94.10197 90.42781 94.87092
## Mar 7 94.12754 92.24623 96.00886 91.25032 97.00477
## Apr 7 95.60572 93.26205 97.94939 92.02139 99.19006
## May 7 97.08390 94.24629 99.92151 92.74414 101.42366
## Jun 7 98.56208 95.20070 101.92345 93.42130 103.70286
## Jul 7 100.04025 96.12683 103.95368 94.05520 106.02531
## Aug 7 101.51843 97.02602 106.01085 94.64788 108.38899
## Sep 7 102.99661 97.89942 108.09380 95.20113 110.79209
## Oct 7 104.47479 98.74807 110.20150 95.71653 113.23304
## Nov 7 105.95296 99.57290 112.33303 96.19550 115.71043
## Dec 7 107.43114 100.37474 114.48755 96.63930 118.22298
## Jan 8 108.90932 101.15432 116.66432 97.04908 120.76956
## Feb 8 110.38750 101.91235 118.86265 97.42587 123.34912
## Mar 8 111.86567 102.64942 121.08193 97.77064 125.96071
## Apr 8 113.34385 103.36613 123.32157 98.08425 128.60346
## May 8 114.82203 104.06300 125.58106 98.36751 131.27655
## Jun 8 116.30021 104.74051 127.85990 98.62118 133.97923
## Jul 8 117.77838 105.39913 130.15764 98.84595 136.71082
## Aug 8 119.25656 106.03928 132.47385 99.04247 139.47066
## Sep 8 120.73474 106.66134 134.80814 99.21134 142.25814
## Oct 8 122.21292 107.26571 137.16013 99.35314 145.07270
##we observed that for the next 24 months, at a 95% confidence level we can expect monthly volume of commercial bank real-estate loans to range from 100 to 145 billions of dollars.
autoplot(forecast(loans_fit,h=24))
