library(forecast)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: timeDate
## This is forecast 6.2
library(zoo)
library(tseries)
#Problem 2
  #Consider the monthly time series for Civilian Unemployment   Rate, FRED/UNRATE and do;
  #Test it for nonstationarity using ADF and KPPS tests.
  #Build a Time Series Until the End of 2014 following the Box Jenkings Methodology
  # Forecast until the end of 2016 once you have checked the model adequacy.

METHODOLOGY

cur_d <- read.csv(file= "C:\\Users\\t420\\Desktop\\Berojgar.csv", header=TRUE, sep=",")

cur <- ts(cur_d[,2],start=c(1948,1), frequency=12)

str(cur)
##  Time-Series [1:817] from 1948 to 2016: 4.9 5 5 5 5.1 5.1 5.3 5.3 5.5 5.4 ...
head(cur)
## [1] 4.9 5.0 5.0 5.0 5.1 5.1
tail(cur)
## [1] 3.6 3.5 3.9 4.0 3.8 3.4
#Splitting the data into two parts
cur_all <- cur
cur_1 <- window(cur_all, end=c(2014,12))

cur_2 <- window(cur_all, start=c(2015,1))
tsdisplay(cur_1, lag.max = 75, xlab=" Years", ylab="Civilian Unemployment Rate", main=" Monthly Civilian Unemployment Rate from 1948 to 2016")

# The plots above shows there is slow decay of data. Let's check the stationarity of data using ADF and KPSS test below.
adf.test(cur_1)
## Warning in adf.test(cur_1): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  cur_1
## Dickey-Fuller = -4.1315, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
# The P-value of ADF is 0.01. That means the data is stationarity.
kpss.test(cur_1,null="Trend")
## Warning in kpss.test(cur_1, null = "Trend"): p-value smaller than printed
## p-value
## 
##  KPSS Test for Trend Stationarity
## 
## data:  cur_1
## KPSS Trend = 0.48338, Truncation lag parameter = 6, p-value = 0.01
#As p-value is less than level of significance. We reject the null hypothesis of stationarity of data. We need to check the differencing.
#First Differencing Test
dlcur_1 <- diff(cur_1)
par(mfrow=c(2,1))
plot (cur_1,xlab=" Years 1948 to 2014", ylab="Civilian Unemployment Rate", main="Civilian Unemployment Rate from 1948 to 2014")
tsdisplay (dlcur_1,xlab=" Years 1948 to 2014", ylab=" Transformed Civilian Unemployment Rate", main="First Difference Civilian Unemployment Rate ")

#checking ADF test for stationarity
adf.test(dlcur_1)
## Warning in adf.test(dlcur_1): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  dlcur_1
## Dickey-Fuller = -7.8935, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
#p value is less than level of significance and we reject null hypothesis. That means the data is stationarity.
#checking KPSS test for stationarity
kpss.test(dlcur_1,null="Trend")
## Warning in kpss.test(dlcur_1, null = "Trend"): p-value greater than printed
## p-value
## 
##  KPSS Test for Trend Stationarity
## 
## data:  dlcur_1
## KPSS Trend = 0.033526, Truncation lag parameter = 6, p-value = 0.1
#P value is greater than level of significance and we fail to reject the null hypothesis and the data is stationary.
# Even after, first differencing, seasonal pattern persists. Therefore, let's adjust that.

sdcur_1 <- diff(diff(cur_1),12)
acf(as.data.frame(sdcur_1),type = 'correlation',lag = 36,main="sample ACF")

acf(as.data.frame(sdcur_1),type = 'partial',lag = 36, main="simple PACF")

arm <- arima(sdcur_1,order=c(4,0,1),seasonal=list(order=c(0,0,2),period=12))
arm
## 
## Call:
## arima(x = sdcur_1, order = c(4, 0, 1), seasonal = list(order = c(0, 0, 2), period = 12))
## 
## Coefficients:
##          ar1     ar2     ar3     ar4      ma1     sma1    sma2  intercept
##       0.4855  0.2004  0.0734  0.0397  -0.4776  -1.2895  0.2895     -3e-04
## s.e.  0.1384  0.0401  0.0514  0.0511   0.1347   0.0557  0.0531      7e-04
## 
## sigma^2 estimated as 0.03728:  log likelihood = 148.89,  aic = -279.77
tsdiag(arm,gof.lag=36)

arm_1 <- arima(sdcur_1,order=c(5,0,1),seasonal=list(order=c(0,0,5),period=12))
arm
## 
## Call:
## arima(x = sdcur_1, order = c(4, 0, 1), seasonal = list(order = c(0, 0, 2), period = 12))
## 
## Coefficients:
##          ar1     ar2     ar3     ar4      ma1     sma1    sma2  intercept
##       0.4855  0.2004  0.0734  0.0397  -0.4776  -1.2895  0.2895     -3e-04
## s.e.  0.1384  0.0401  0.0514  0.0511   0.1347   0.0557  0.0531      7e-04
## 
## sigma^2 estimated as 0.03728:  log likelihood = 148.89,  aic = -279.77
tsdiag(arm_1,gof.lag=36)

#As ARIMA(5,0,1)(0,0,5) has low BIC value and p-values are greater than 0.6 in Ljung Box model. So ARIMA(5,0,1)(0,0,5) is adequate.
arm_1.LB <- Box.test(arm_1$residuals, lag=24, type="Ljung") 
arm_1.LB
## 
##  Box-Ljung test
## 
## data:  arm_1$residuals
## X-squared = 19.59, df = 24, p-value = 0.7199
arm_1.fcast <- forecast(arm_1,h=24)
plot(arm_1.fcast, xlim=c(2009,2016))