METHODOLOGY
cur_d <- read.csv(file= "C:\\Users\\t420\\Desktop\\Berojgar.csv", header=TRUE, sep=",")
cur <- ts(cur_d[,2],start=c(1948,1), frequency=12)
str(cur)
## Time-Series [1:817] from 1948 to 2016: 4.9 5 5 5 5.1 5.1 5.3 5.3 5.5 5.4 ...
head(cur)
## [1] 4.9 5.0 5.0 5.0 5.1 5.1
tail(cur)
## [1] 3.6 3.5 3.9 4.0 3.8 3.4
#Splitting the data into two parts
cur_all <- cur
cur_1 <- window(cur_all, end=c(2014,12))
cur_2 <- window(cur_all, start=c(2015,1))
tsdisplay(cur_1, lag.max = 75, xlab=" Years", ylab="Civilian Unemployment Rate", main=" Monthly Civilian Unemployment Rate from 1948 to 2016")

# The plots above shows there is slow decay of data. Let's check the stationarity of data using ADF and KPSS test below.
adf.test(cur_1)
## Warning in adf.test(cur_1): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: cur_1
## Dickey-Fuller = -4.1315, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
# The P-value of ADF is 0.01. That means the data is stationarity.
kpss.test(cur_1,null="Trend")
## Warning in kpss.test(cur_1, null = "Trend"): p-value smaller than printed
## p-value
##
## KPSS Test for Trend Stationarity
##
## data: cur_1
## KPSS Trend = 0.48338, Truncation lag parameter = 6, p-value = 0.01
#As p-value is less than level of significance. We reject the null hypothesis of stationarity of data. We need to check the differencing.
#First Differencing Test
dlcur_1 <- diff(cur_1)
par(mfrow=c(2,1))
plot (cur_1,xlab=" Years 1948 to 2014", ylab="Civilian Unemployment Rate", main="Civilian Unemployment Rate from 1948 to 2014")
tsdisplay (dlcur_1,xlab=" Years 1948 to 2014", ylab=" Transformed Civilian Unemployment Rate", main="First Difference Civilian Unemployment Rate ")


#checking ADF test for stationarity
adf.test(dlcur_1)
## Warning in adf.test(dlcur_1): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: dlcur_1
## Dickey-Fuller = -7.8935, Lag order = 9, p-value = 0.01
## alternative hypothesis: stationary
#p value is less than level of significance and we reject null hypothesis. That means the data is stationarity.
#checking KPSS test for stationarity
kpss.test(dlcur_1,null="Trend")
## Warning in kpss.test(dlcur_1, null = "Trend"): p-value greater than printed
## p-value
##
## KPSS Test for Trend Stationarity
##
## data: dlcur_1
## KPSS Trend = 0.033526, Truncation lag parameter = 6, p-value = 0.1
#P value is greater than level of significance and we fail to reject the null hypothesis and the data is stationary.
# Even after, first differencing, seasonal pattern persists. Therefore, let's adjust that.
sdcur_1 <- diff(diff(cur_1),12)
acf(as.data.frame(sdcur_1),type = 'correlation',lag = 36,main="sample ACF")

acf(as.data.frame(sdcur_1),type = 'partial',lag = 36, main="simple PACF")

arm <- arima(sdcur_1,order=c(4,0,1),seasonal=list(order=c(0,0,2),period=12))
arm
##
## Call:
## arima(x = sdcur_1, order = c(4, 0, 1), seasonal = list(order = c(0, 0, 2), period = 12))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 sma1 sma2 intercept
## 0.4855 0.2004 0.0734 0.0397 -0.4776 -1.2895 0.2895 -3e-04
## s.e. 0.1384 0.0401 0.0514 0.0511 0.1347 0.0557 0.0531 7e-04
##
## sigma^2 estimated as 0.03728: log likelihood = 148.89, aic = -279.77
tsdiag(arm,gof.lag=36)

arm_1 <- arima(sdcur_1,order=c(5,0,1),seasonal=list(order=c(0,0,5),period=12))
arm
##
## Call:
## arima(x = sdcur_1, order = c(4, 0, 1), seasonal = list(order = c(0, 0, 2), period = 12))
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 sma1 sma2 intercept
## 0.4855 0.2004 0.0734 0.0397 -0.4776 -1.2895 0.2895 -3e-04
## s.e. 0.1384 0.0401 0.0514 0.0511 0.1347 0.0557 0.0531 7e-04
##
## sigma^2 estimated as 0.03728: log likelihood = 148.89, aic = -279.77
tsdiag(arm_1,gof.lag=36)

#As ARIMA(5,0,1)(0,0,5) has low BIC value and p-values are greater than 0.6 in Ljung Box model. So ARIMA(5,0,1)(0,0,5) is adequate.
arm_1.LB <- Box.test(arm_1$residuals, lag=24, type="Ljung")
arm_1.LB
##
## Box-Ljung test
##
## data: arm_1$residuals
## X-squared = 19.59, df = 24, p-value = 0.7199
arm_1.fcast <- forecast(arm_1,h=24)
plot(arm_1.fcast, xlim=c(2009,2016))
