library(Quandl)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library("forecast")
## Loading required package: timeDate
## This is forecast 6.2
library("tseries")
cur_1<-read.csv("C:\\Users\\ASMSHAKIL\\Desktop\\FRED-UNRATE.csv", header=TRUE, sep=",")
cur<- ts(cur_1[,2], start=c(1948,1), frequency = 12)
cur1<- window(cur, end = c(2014,12))
cur2<- window(cur, start=c(2015,1))
tsdisplay(cur1,xlab="Monthly Time period (1948-2014)", ylab= "Civilian Unemployment Rate", main="Civilian Unemployment Rate (Monthly)")

Based on the plot, we can say that there is no exponential component in the data.Based on the ACF(very slow decay) and PACF, the level data doesn’t look stationary. Now, let’s check the stationarity of the data based on ADF and KPSS tests.

ADF and KPSS Test:

1. Level Data:

adf.test(cur1)
## Warning in adf.test(cur1): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  cur1
## Dickey-Fuller = -4.1315, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(cur1, null="Trend")
## Warning in kpss.test(cur1, null = "Trend"): p-value smaller than printed p-
## value
## 
##  KPSS Test for Trend Stationarity
## 
## data:  cur1
## KPSS Trend = 0.48338, Truncation lag parameter = 6, p-value = 0.01

ADF test suggests that the data is stationary on the level (p-value=0.01). The KPSS test suggests that the data needs differencing to be stationary (which supports the ACF and PACF plot on the level).

2. On differenced data:

Based on the KPSS test, if we difference the data and run the ADF and KPSS tests again, we get the following results:

dcur1<-diff(cur1)
tsdisplay(dcur1,xlab="Monthly Time period (1948-2014)", ylab= "Differenced Civilian Unemployment Rate", main="Differenced Civilian Unemployment Rate (Monthly)")

adf.test(dcur1)
## Warning in adf.test(dcur1): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  dcur1
## Dickey-Fuller = -7.8935, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(dcur1, null="Trend")
## Warning in kpss.test(dcur1, null = "Trend"): p-value greater than printed
## p-value
## 
##  KPSS Test for Trend Stationarity
## 
## data:  dcur1
## KPSS Trend = 0.033526, Truncation lag parameter = 6, p-value = 0.1

Here, based on the ADF test, we can see the data becomes stationary after difference. Furthermore, KPSS test also suggests that, the data becomes stationary after differencing.

sdcur1<-diff(diff(cur1),12)
tsdisplay(sdcur1,xlab="Monthly Time period (1948-2014)", ylab= "Seasonally Differenced Civilian Unemployment Rate", main="Seasonally Differenced Civilian Unemployment Rate (Monthly)")

adf.test(sdcur1)
## Warning in adf.test(sdcur1): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  sdcur1
## Dickey-Fuller = -9.5457, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(sdcur1, null="Trend")
## Warning in kpss.test(sdcur1, null = "Trend"): p-value greater than printed
## p-value
## 
##  KPSS Test for Trend Stationarity
## 
## data:  sdcur1
## KPSS Trend = 0.012341, Truncation lag parameter = 6, p-value = 0.1

With seasonal differencing, the data is again Stationary by ADF and KPSS test.

Model Estimation:

If we decide to take the differenced series only, then based on ACF and PACF we can decide about a model, we can think about the following models:

sar3ma3<-arima(cur1,order=c(3,1,0),seasonal=list(order=c(0,0,3),period=12))
sar3ma3
## 
## Call:
## arima(x = cur1, order = c(3, 1, 0), seasonal = list(order = c(0, 0, 3), period = 12))
## 
## Coefficients:
##          ar1     ar2     ar3     sma1     sma2     sma3
##       0.0226  0.2593  0.2013  -0.2419  -0.1909  -0.1210
## s.e.  0.0347  0.0335  0.0349   0.0382   0.0393   0.0386
## 
## sigma^2 estimated as 0.03571:  log likelihood = 196.75,  aic = -379.5
tsdiag(sar3ma3, gof.lag=24)

sar3ma3<-arima(cur1,order=c(3,1,0),seasonal=list(order=c(0,0,3),period=12))
sar3ma3
## 
## Call:
## arima(x = cur1, order = c(3, 1, 0), seasonal = list(order = c(0, 0, 3), period = 12))
## 
## Coefficients:
##          ar1     ar2     ar3     sma1     sma2     sma3
##       0.0226  0.2593  0.2013  -0.2419  -0.1909  -0.1210
## s.e.  0.0347  0.0335  0.0349   0.0382   0.0393   0.0386
## 
## sigma^2 estimated as 0.03571:  log likelihood = 196.75,  aic = -379.5
tsdiag(sar3ma3, gof.lag=24)

sar5ma4<-arima(cur1,order=c(5,1,0),seasonal=list(order=c(0,0,4),period=12))
sar5ma4
## 
## Call:
## arima(x = cur1, order = c(5, 1, 0), seasonal = list(order = c(0, 0, 4), period = 12))
## 
## Coefficients:
##           ar1     ar2     ar3     ar4     ar5     sma1     sma2     sma3
##       -0.0091  0.2059  0.1752  0.1055  0.1216  -0.2685  -0.1966  -0.0971
## s.e.   0.0351  0.0353  0.0353  0.0358  0.0355   0.0389   0.0402   0.0400
##         sma4
##       0.0033
## s.e.  0.0399
## 
## sigma^2 estimated as 0.03483:  log likelihood = 206.68,  aic = -393.37
tsdiag(sar5ma4, gof.lag=24)

Now, if we take the data with seasonal difference, we get the following results:

sar3_1ma4_2<-arima(cur1,order=c(3,1,4),seasonal=list(order=c(1,1,2),period=12))
## Warning in arima(cur1, order = c(3, 1, 4), seasonal = list(order = c(1, :
## possible convergence problem: optim gave code = 1
sar3_1ma4_2
## 
## Call:
## arima(x = cur1, order = c(3, 1, 4), seasonal = list(order = c(1, 1, 2), period = 12))
## 
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
##           ar1     ar2     ar3     ma1     ma2      ma3     ma4    sar1
##       -0.2859  0.0082  0.8343  0.2712  0.2131  -0.5877  0.2121  0.5483
## s.e.      NaN     NaN     NaN     NaN  0.0469      NaN     NaN  0.0812
##          sma1    sma2
##       -1.8061  0.8098
## s.e.   0.0604  0.0596
## 
## sigma^2 estimated as 0.03566:  log likelihood = 164.88,  aic = -307.76
tsdiag(sar3_1ma4_2, gof.lag=24)

Based on the above models and the adequacy tests(L-Jungbox, Standardized residuals, AIC and BIC), ARIMA(3,1,0)(0,0,1) and ARIMA(3.1.4)(1,1,2) are the best models for forecasting.

Forecasting:

Based on the above models, the forecasts are:

sar3ma3.fcast <- forecast(sar3ma3, h=24)
plot(sar3ma3.fcast, xlim=c(2010,2016))

sar3_1ma4_2.fcast <- forecast(sar3_1ma4_2, h=24)
plot(sar3_1ma4_2.fcast, xlim=c(2010,2016))