library(Quandl)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library("forecast")
## Loading required package: timeDate
## This is forecast 6.2
library("tseries")
cur_1<-read.csv("C:\\Users\\ASMSHAKIL\\Desktop\\FRED-UNRATE.csv", header=TRUE, sep=",")
cur<- ts(cur_1[,2], start=c(1948,1), frequency = 12)
cur1<- window(cur, end = c(2014,12))
cur2<- window(cur, start=c(2015,1))
tsdisplay(cur1,xlab="Monthly Time period (1948-2014)", ylab= "Civilian Unemployment Rate", main="Civilian Unemployment Rate (Monthly)")
Based on the plot, we can say that there is no exponential component in the data.Based on the ACF(very slow decay) and PACF, the level data doesn’t look stationary. Now, let’s check the stationarity of the data based on ADF and KPSS tests.
adf.test(cur1)
## Warning in adf.test(cur1): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: cur1
## Dickey-Fuller = -4.1315, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(cur1, null="Trend")
## Warning in kpss.test(cur1, null = "Trend"): p-value smaller than printed p-
## value
##
## KPSS Test for Trend Stationarity
##
## data: cur1
## KPSS Trend = 0.48338, Truncation lag parameter = 6, p-value = 0.01
ADF test suggests that the data is stationary on the level (p-value=0.01). The KPSS test suggests that the data needs differencing to be stationary (which supports the ACF and PACF plot on the level).
Based on the KPSS test, if we difference the data and run the ADF and KPSS tests again, we get the following results:
dcur1<-diff(cur1)
tsdisplay(dcur1,xlab="Monthly Time period (1948-2014)", ylab= "Differenced Civilian Unemployment Rate", main="Differenced Civilian Unemployment Rate (Monthly)")
adf.test(dcur1)
## Warning in adf.test(dcur1): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: dcur1
## Dickey-Fuller = -7.8935, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(dcur1, null="Trend")
## Warning in kpss.test(dcur1, null = "Trend"): p-value greater than printed
## p-value
##
## KPSS Test for Trend Stationarity
##
## data: dcur1
## KPSS Trend = 0.033526, Truncation lag parameter = 6, p-value = 0.1
Here, based on the ADF test, we can see the data becomes stationary after difference. Furthermore, KPSS test also suggests that, the data becomes stationary after differencing.
sdcur1<-diff(diff(cur1),12)
tsdisplay(sdcur1,xlab="Monthly Time period (1948-2014)", ylab= "Seasonally Differenced Civilian Unemployment Rate", main="Seasonally Differenced Civilian Unemployment Rate (Monthly)")
adf.test(sdcur1)
## Warning in adf.test(sdcur1): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: sdcur1
## Dickey-Fuller = -9.5457, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(sdcur1, null="Trend")
## Warning in kpss.test(sdcur1, null = "Trend"): p-value greater than printed
## p-value
##
## KPSS Test for Trend Stationarity
##
## data: sdcur1
## KPSS Trend = 0.012341, Truncation lag parameter = 6, p-value = 0.1
With seasonal differencing, the data is again Stationary by ADF and KPSS test.
If we decide to take the differenced series only, then based on ACF and PACF we can decide about a model, we can think about the following models:
sar3ma3<-arima(cur1,order=c(3,1,0),seasonal=list(order=c(0,0,3),period=12))
sar3ma3
##
## Call:
## arima(x = cur1, order = c(3, 1, 0), seasonal = list(order = c(0, 0, 3), period = 12))
##
## Coefficients:
## ar1 ar2 ar3 sma1 sma2 sma3
## 0.0226 0.2593 0.2013 -0.2419 -0.1909 -0.1210
## s.e. 0.0347 0.0335 0.0349 0.0382 0.0393 0.0386
##
## sigma^2 estimated as 0.03571: log likelihood = 196.75, aic = -379.5
tsdiag(sar3ma3, gof.lag=24)
sar3ma3<-arima(cur1,order=c(3,1,0),seasonal=list(order=c(0,0,3),period=12))
sar3ma3
##
## Call:
## arima(x = cur1, order = c(3, 1, 0), seasonal = list(order = c(0, 0, 3), period = 12))
##
## Coefficients:
## ar1 ar2 ar3 sma1 sma2 sma3
## 0.0226 0.2593 0.2013 -0.2419 -0.1909 -0.1210
## s.e. 0.0347 0.0335 0.0349 0.0382 0.0393 0.0386
##
## sigma^2 estimated as 0.03571: log likelihood = 196.75, aic = -379.5
tsdiag(sar3ma3, gof.lag=24)
sar5ma4<-arima(cur1,order=c(5,1,0),seasonal=list(order=c(0,0,4),period=12))
sar5ma4
##
## Call:
## arima(x = cur1, order = c(5, 1, 0), seasonal = list(order = c(0, 0, 4), period = 12))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 sma1 sma2 sma3
## -0.0091 0.2059 0.1752 0.1055 0.1216 -0.2685 -0.1966 -0.0971
## s.e. 0.0351 0.0353 0.0353 0.0358 0.0355 0.0389 0.0402 0.0400
## sma4
## 0.0033
## s.e. 0.0399
##
## sigma^2 estimated as 0.03483: log likelihood = 206.68, aic = -393.37
tsdiag(sar5ma4, gof.lag=24)
Now, if we take the data with seasonal difference, we get the following results:
sar3_1ma4_2<-arima(cur1,order=c(3,1,4),seasonal=list(order=c(1,1,2),period=12))
## Warning in arima(cur1, order = c(3, 1, 4), seasonal = list(order = c(1, :
## possible convergence problem: optim gave code = 1
sar3_1ma4_2
##
## Call:
## arima(x = cur1, order = c(3, 1, 4), seasonal = list(order = c(1, 1, 2), period = 12))
##
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
## ar1 ar2 ar3 ma1 ma2 ma3 ma4 sar1
## -0.2859 0.0082 0.8343 0.2712 0.2131 -0.5877 0.2121 0.5483
## s.e. NaN NaN NaN NaN 0.0469 NaN NaN 0.0812
## sma1 sma2
## -1.8061 0.8098
## s.e. 0.0604 0.0596
##
## sigma^2 estimated as 0.03566: log likelihood = 164.88, aic = -307.76
tsdiag(sar3_1ma4_2, gof.lag=24)
Based on the above models and the adequacy tests(L-Jungbox, Standardized residuals, AIC and BIC), ARIMA(3,1,0)(0,0,1) and ARIMA(3.1.4)(1,1,2) are the best models for forecasting.
Based on the above models, the forecasts are:
sar3ma3.fcast <- forecast(sar3ma3, h=24)
plot(sar3ma3.fcast, xlim=c(2010,2016))
sar3_1ma4_2.fcast <- forecast(sar3_1ma4_2, h=24)
plot(sar3_1ma4_2.fcast, xlim=c(2010,2016))