#Plotting time series 

library(tseries)
## Registered S3 method overwritten by 'xts':
##   method     from
##   as.zoo.xts zoo
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(urca)
library(forecast)
## Registered S3 methods overwritten by 'forecast':
##   method             from    
##   fitted.fracdiff    fracdiff
##   residuals.fracdiff fracdiff
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
loans <- ts(c(46.5, 47, 47.5, 48.3, 49.1, 50.1, 51.1, 52, 53.2, 53.9, 54.5, 55.2, 55.6, 55.7, 56.1,
56.8, 57.5, 58.3, 58.9, 59.4, 59.8, 60, 60, 60.3, 60.1, 59.7, 59.5, 59.4, 59.3, 59.2,
59.1, 59, 59.3, 59.5, 59.5, 59.5, 59.7, 59.7, 60.5, 60.7, 61.3, 61.4, 61.8,
62.4, 62.4, 62.9, 63.2, 63.4, 63.9, 64.5, 65, 65.4, 66.3, 67.7, 69, 70, 71.4,
72.5, 73.4, 74.6, 75.2, 75.9, 76.8, 77.9, 79.2, 80.5, 82.6, 84.4, 85.9, 87.6) ,frequency = 12)

##Removing Seasonality

loans %>% stl(s.window='periodic') %>% seasadj() -> loans_adj
par(mar=c(3,3,3,3))
par(mfrow = c(1,3))
plot.ts(loans_adj)
acf(loans_adj)
pacf(loans_adj)

#Lets check for the stationarity of the time series 'loans_adj'
##Augmented Dickey-Fuller Test 
adf.test(loans_adj)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  loans_adj
## Dickey-Fuller = -0.39076, Lag order = 4, p-value = 0.984
## alternative hypothesis: stationary
#Here, we see that the p-value is greater than the alpha value. Hence, we do not have enough evidence to reject null hypothsis. Given time series 'loans' is non-stationary. We will have to check if the time series 'loans' is homogenous non-stationary or not. 

##Augmented Dickey-Fuller Test Unit Root Test
test <- ur.df(loans, type = "trend", lags = 3)
summary(test)
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression trend 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.55248 -0.21105  0.03189  0.18370  0.80280 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.310572   0.646177   0.481   0.6325    
## z.lag.1     -0.007655   0.013711  -0.558   0.5787    
## tt           0.006546   0.005528   1.184   0.2410    
## z.diff.lag1  0.529458   0.126175   4.196  9.1e-05 ***
## z.diff.lag2  0.244167   0.142626   1.712   0.0921 .  
## z.diff.lag3  0.150163   0.135170   1.111   0.2710    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2879 on 60 degrees of freedom
## Multiple R-squared:  0.7386, Adjusted R-squared:  0.7169 
## F-statistic: 33.92 on 5 and 60 DF,  p-value: 2.789e-16
## 
## 
## Value of test-statistic is: -0.5583 1.5059 1.7879 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau3 -4.04 -3.45 -3.15
## phi2  6.50  4.88  4.16
## phi3  8.73  6.49  5.47
#Since the value of test statistic -3908 is greater than -3.15, we cannot reject the null hypothesis of non-stationarity.
##Performing KPSS Unit root Test 
KPSS <- ur.kpss(loans_adj)
summary(KPSS)
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 3 lags. 
## 
## Value of test-statistic is: 1.6054 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739
#Since, test statistic value 1.6054 is greater than 0.347 we reject the null hypothesis of stationarity

##We will have to transform the time series to stationary series by taking the difference for modelling purposes.

loans_adj %>% diff() %>% ggtsdisplay(main="")

#To check if the first order difference is stationary. Let us do ADF test. 

loans_adj %>% diff() %>% adf.test()
## 
##  Augmented Dickey-Fuller Test
## 
## data:  .
## Dickey-Fuller = -1.5927, Lag order = 4, p-value = 0.7407
## alternative hypothesis: stationary
#Since p-vale is greater than alpha, we cannot reject the null-hypothesis of non-stationarity.
##We will have to take second order difference.

loans_adj %>% diff() %>% diff() %>% ggtsdisplay(main="")

#Let us do the ADF test for the second order difference of time series loans_adj. 

loans_adj %>% diff() %>% diff() %>% adf.test()
## Warning in adf.test(.): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  .
## Dickey-Fuller = -4.199, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
#The p value is less than the alpha value, we now reject the null hypothesis of non-stationarity. We also observe that the ACF is getting cut at 1 after tkaing the difference. Therefore, the time series can be a M.A(1) model after taking the difference. 

## Let us now fit this time series into model and estimate its parameters.

loans_fit <- Arima(loans_adj,order = c(0,2,1),include.constant = TRUE)
loans_fit
## Series: loans_adj 
## ARIMA(0,2,1) 
## 
## Coefficients:
##           ma1
##       -0.4778
## s.e.   0.1050
## 
## sigma^2 estimated as 0.09123:  log likelihood=-14.68
## AIC=33.36   AICc=33.55   BIC=37.8
#The estimated value of the parameter theta1 is -0.4778.

##Let us check with few other models to check if we are getting the lowest AIC and BIC values. 

loans_fit2 <- Arima(loans_adj,order = c(1,2,1),include.constant = TRUE)
loans_fit3 <- Arima(loans_adj,order = c(0,2,2),include.constant = TRUE)

loans_fit2
## Series: loans_adj 
## ARIMA(1,2,1) 
## 
## Coefficients:
##           ar1      ma1
##       -0.3265  -0.1932
## s.e.   0.2453   0.2573
## 
## sigma^2 estimated as 0.09081:  log likelihood=-14.02
## AIC=34.04   AICc=34.41   BIC=40.7
loans_fit3
## Series: loans_adj 
## ARIMA(0,2,2) 
## 
## Coefficients:
##           ma1     ma2
##       -0.5038  0.1325
## s.e.   0.1156  0.1295
## 
## sigma^2 estimated as 0.09129:  log likelihood=-14.19
## AIC=34.38   AICc=34.76   BIC=41.04
#We see that the lowest AIC and BIC values are in the loans_fit i.e. Arima(0,2,1). Let us check for the residuals of the model loans_fit. 

checkresiduals(loans_fit)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,2,1)
## Q* = 14.118, df = 13, p-value = 0.3656
## 
## Model df: 1.   Total lags used: 14
#The property of white noise is shown by the residuals of the model. The plot of ACF shows that they are uncorrelated and histogram shows the normality. 

autoplot(loans_fit)

#The autoplot shows that the MA root of our fitted lies within the unit root circle showing that the time series loans_fit is stationarity in nature

##Let us try and check using the auto.arima function to verify the model 

loans_auto <- auto.arima(loans_adj)
loans_auto
## Series: loans_adj 
## ARIMA(0,2,1)(1,0,0)[12] 
## 
## Coefficients:
##           ma1    sar1
##       -0.5122  0.2058
## s.e.   0.1047  0.1261
## 
## sigma^2 estimated as 0.08846:  log likelihood=-13.4
## AIC=32.8   AICc=33.18   BIC=39.46
#The results of auto.arima function shows that the model chosen is correct i.e. Arima(0,2,1)

##Now, we conclude that the given time series loans_adj is an Arima(0,2,1) model and below is its estimated parameter

###theta1 = -0.5122 
#Let us now find the forecast using this model for next 24 time points in futre 

forecast(loans_fit)
##       Point Forecast     Lo 80     Hi 80    Lo 95     Hi 95
## Nov 6       88.21484  87.82776  88.60191 87.62285  88.80682
## Dec 6       89.69301  88.98804  90.39798 88.61486  90.77117
## Jan 7       91.17119  90.11140  92.23098 89.55039  92.79199
## Feb 7       92.64937  91.19677  94.10197 90.42781  94.87092
## Mar 7       94.12754  92.24623  96.00886 91.25032  97.00477
## Apr 7       95.60572  93.26205  97.94939 92.02139  99.19006
## May 7       97.08390  94.24629  99.92151 92.74414 101.42366
## Jun 7       98.56208  95.20070 101.92345 93.42130 103.70286
## Jul 7      100.04025  96.12683 103.95368 94.05520 106.02531
## Aug 7      101.51843  97.02602 106.01085 94.64788 108.38899
## Sep 7      102.99661  97.89942 108.09380 95.20113 110.79209
## Oct 7      104.47479  98.74807 110.20150 95.71653 113.23304
## Nov 7      105.95296  99.57290 112.33303 96.19550 115.71043
## Dec 7      107.43114 100.37474 114.48755 96.63930 118.22298
## Jan 8      108.90932 101.15432 116.66432 97.04908 120.76956
## Feb 8      110.38750 101.91235 118.86265 97.42587 123.34912
## Mar 8      111.86567 102.64942 121.08193 97.77064 125.96071
## Apr 8      113.34385 103.36613 123.32157 98.08425 128.60346
## May 8      114.82203 104.06300 125.58106 98.36751 131.27655
## Jun 8      116.30021 104.74051 127.85990 98.62118 133.97923
## Jul 8      117.77838 105.39913 130.15764 98.84595 136.71082
## Aug 8      119.25656 106.03928 132.47385 99.04247 139.47066
## Sep 8      120.73474 106.66134 134.80814 99.21134 142.25814
## Oct 8      122.21292 107.26571 137.16013 99.35314 145.07270
##we observed that for the next 24 months, at a 95% confidence level we can expect monthly volume of commercial bank real-estate loans to range from 100 to 145 billions of dollars.
autoplot(forecast(loans_fit,h=24))