List of Packages Used

library(fpp2) 
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.4 ──
## ✔ ggplot2   3.3.6      ✔ fma       2.4   
## ✔ forecast  8.17.0     ✔ expsmooth 2.3
## 
library(ggplot2) 
library(xts)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(tseries)
library(readxl) 
library(forecast)
library(seastests)
## Warning: package 'seastests' was built under R version 4.2.2

Import Dataset

data <- read_excel("C:/Users/notebook/Downloads/Aus_UnemploymentRate.xlsx")

#Convert to time series data
data.ts <- ts(data[,-1] , frequency = 12 , start = c(2000,1))
data.xts <- as.xts(data.ts)

Data Visualization

#Construct time series graph
autoplot(data.xts , main = "Australia's Monthly Unemployment Rate from January 2000 to January 2022") +
  labs(x = "Time" , y = "Unemployment Rate (%)") +
  ggeasy::easy_center_title() + 
  geom_line(colour = "blue", size = 1) 

#Construct seasonal plot
ggseasonplot(data.ts, year.labels=TRUE, year.labels.left=TRUE) +
  ylab("Unemployment Rate (%)") +
  ggtitle("Seasonal Plot: Australia's Monthly Unemployment Rate from 2000 to 2022") +
  ggeasy::easy_center_title()

### Data Analysis

#Density of Unemployment Rate
hist(data.ts, col = "lightblue", border = "black", prob = TRUE, xlim = c(3, 8), xlab = "Unemployment Rate (%)", main = "Density of Australia's Monthly Unemployment Rate (2000-2022)")
lines(density(data.ts), lwd = 2, col = "red")

#QQ-Plot of Unemployment Rate
qqnorm(data.ts, pch = 1, frame = FALSE, main="QQ-plot of Australia's Monthly Unemployment Rate (2000-2022)")
qqline(data.ts, col = "red", lwd = 2)

Model Identification (Box-Jenkins Approach)

##Original Data
#Sample ACF and PACF
ggAcf(data.ts,main="Sample ACF of Unemployment Rate") +
  ggeasy::easy_center_title()
## Warning: Ignoring unknown parameters: main

ggPacf(data.ts,main="Sample PACF of Unemployment Rate") +
  ggeasy::easy_center_title()
## Warning: Ignoring unknown parameters: main

#Seasonality test
fried(data.ts,freq=12)
## Test used:  Friedman rank 
##  
## Test statistic:  1.08 
## P-value:  0.9999242
#Stationarity test
kpss.test(data.ts) 
## 
##  KPSS Test for Level Stationarity
## 
## data:  data.ts
## KPSS Level = 0.48255, Truncation lag parameter = 5, p-value = 0.0456
#Check whether differencing is required
nsdiffs(data.ts)
## [1] 0
ndiffs(data.ts)
## [1] 1
##Differenced Data
#First order differencing
diff.data <-diff(data.ts)
ggtsdisplay(diff.data,main = "Differenced Time Series Data from January 2000 to January 2022",lag=50)

#Stationarity test
adf.test(diff.data)
## Warning in adf.test(diff.data): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff.data
## Dickey-Fuller = -5.2962, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(diff.data) 
## Warning in kpss.test(diff.data): p-value greater than printed p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  diff.data
## KPSS Level = 0.080794, Truncation lag parameter = 5, p-value = 0.1

Parameter Estimation

#ARIMA(0,1,0)
arima1 <- Arima(data.ts,order=c(0,1,0),method="ML")
arima1
## Series: data.ts 
## ARIMA(0,1,0) 
## 
## sigma^2 = 0.03339:  log likelihood = 74.13
## AIC=-146.25   AICc=-146.24   BIC=-142.68
#ARIMA(6,1,6)
arima2 <- Arima(data.ts,order=c(6,1,6),method="ML")
arima2
## Series: data.ts 
## ARIMA(6,1,6) 
## 
## Coefficients:
##           ar1     ar2     ar3      ar4      ar5      ar6     ma1      ma2
##       -0.1153  0.1374  0.2483  -0.0704  -0.1972  -0.6495  0.1806  -0.0902
## s.e.   0.1103  0.1015  0.1130   0.1049   0.1025   0.0981  0.0894   0.0761
##           ma3     ma4     ma5     ma6
##       -0.1539  0.0999  0.1798  0.9047
## s.e.   0.0853  0.0763  0.0898  0.0781
## 
## sigma^2 = 0.02986:  log likelihood = 90.22
## AIC=-154.43   AICc=-152.97   BIC=-107.94
#ARIMA(0,1,6)
arima3 <- Arima(data.ts,order=c(0,1,6),method="ML")
arima3
## Series: data.ts 
## ARIMA(0,1,6) 
## 
## Coefficients:
##          ma1     ma2     ma3     ma4     ma5     ma6
##       0.1043  0.0706  0.1118  0.0220  0.0077  0.2512
## s.e.  0.0601  0.0614  0.0607  0.0662  0.0709  0.0770
## 
## sigma^2 = 0.03212:  log likelihood = 82.08
## AIC=-150.17   AICc=-149.73   BIC=-125.14

Diagnostic Checking

#Log-likelihood and AICc value can be obtained from parameter estimation's code
#Compute MSE value
mean(arima1$residuals^2)
## [1] 0.03326602
mean(arima2$residuals^2)
## [1] 0.02839851
mean(arima3$residuals^2)
## [1] 0.03126819

Quadratic Trend Model (Deterministic)

quad <- tslm(data.ts~trend + I(trend^2))

#Plot of actual vs deterministic fitted value
autoplot(data.xts) +
  autolayer(fitted(quad), series="Quadratic Trend ") +
  ggtitle("Australia's Actual VS Predicted Monthly Unemployment Rate ") +
  xlab("Year") + ylab("Unemployment Rate (%)") + xlab("Time") + 
  guides(colour=guide_legend(title="Model")) + 
  ggeasy::easy_center_title() 

### Model Comparison between Deterministic and Stochastic Model

#Construct next 24 months forecast
f.arima4<- forecast(arima2,h=24)
f.det<- forecast(quad,h=24)

#Plot both stochastic and deterministic forecasted values on the same graph
autoplot(data.xts) +
  autolayer(f.arima4, series="ARIMA (6,1,6) ") +
  autolayer(f.det, series="Quadratic Trend ") +
  ggtitle("Forecast of Stochastic VS Deterministic Time Series Model ") +
  xlab("Year") + ylab("Unemployment Rate (%)") + xlab("Time") + 
  guides(colour=guide_legend(title="Model")) + 
  ggeasy::easy_center_title()

#Plot both stochastic and deterministic fitted values on the same graph
autoplot(data.xts) +
  autolayer(fitted(arima2), series="ARIMA (6,1,6) ") +
  autolayer(fitted(quad), series="Quadratic Trend ") +
  ggtitle("Stochastic VS Deterministic Time Series Model ") +
  xlab("Year") + ylab("Unemployment Rate (%)") + xlab("Time") + 
  guides(colour=guide_legend(title="Model")) + 
  ggeasy::easy_center_title()

Diagnostic Checking

#Plot original, ACF and density graph of residuals
checkresiduals(quad)

## 
##  Breusch-Godfrey test for serial correlation of order up to 24
## 
## data:  Residuals from Linear regression model
## LM test = 245.39, df = 24, p-value < 2.2e-16
checkresiduals(arima2)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(6,1,6)
## Q* = 11.166, df = 12, p-value = 0.5147
## 
## Model df: 12.   Total lags used: 24
#Ljung-box Test
Box.test(residuals(arima2),lag=24, fitdf=2, type="Lj")
## 
##  Box-Ljung test
## 
## data:  residuals(arima2)
## X-squared = 11.166, df = 22, p-value = 0.9723
#Obtain R-sqaured value of 
summary(quad)
## 
## Call:
## tslm(formula = data.ts ~ trend + I(trend^2))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.86813 -0.44435  0.04955  0.44574  1.66573 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.357e+00  1.167e-01  54.470  < 2e-16 ***
## trend       -1.681e-02  2.026e-03  -8.296 5.73e-15 ***
## I(trend^2)   5.908e-05  7.377e-06   8.008 3.82e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6286 on 262 degrees of freedom
## Multiple R-squared:  0.2081, Adjusted R-squared:  0.202 
## F-statistic: 34.41 on 2 and 262 DF,  p-value: 5.363e-14
#Compute MSE value
mean(quad$residuals^2)
## [1] 0.3906036
mean(arima2$residuals^2)
## [1] 0.02839851

Forecasting

#12 months of unemployment rate forecast with ARIMA (6,1,6) model
f.arima2<- forecast(arima2,h=12)
f.arima2
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Feb 2022       4.059439 3.836457 4.282421 3.718417 4.400461
## Mar 2022       3.961406 3.635639 4.287172 3.463189 4.459623
## Apr 2022       3.940772 3.531438 4.350105 3.314749 4.566794
## May 2022       3.766529 3.276226 4.256831 3.016675 4.516382
## Jun 2022       3.676886 3.113581 4.240190 2.815386 4.538386
## Jul 2022       3.811861 3.184279 4.439443 2.852057 4.771665
## Aug 2022       3.842327 3.131480 4.553174 2.755180 4.929474
## Sep 2022       3.915106 3.137201 4.693012 2.725403 5.104810
## Oct 2022       3.998474 3.159544 4.837404 2.715441 5.281507
## Nov 2022       4.127776 3.233297 5.022256 2.759788 5.495765
## Dec 2022       4.171859 3.229251 5.114466 2.730265 5.613453
## Jan 2023       4.106441 3.120734 5.092148 2.598932 5.613950
autoplot(f.arima2, main="ARIMA (6,1,6) Forecasted 12 Months Australia's Monthly Unemployment Rate ",ylab="Unemployment Rate (%)") +
  ggeasy::easy_center_title()

#Compute forecasted values for the next 8 months
f.a2<- forecast(arima2,h=8)
f.a2
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Feb 2022       4.059439 3.836457 4.282421 3.718417 4.400461
## Mar 2022       3.961406 3.635639 4.287172 3.463189 4.459623
## Apr 2022       3.940772 3.531438 4.350105 3.314749 4.566794
## May 2022       3.766529 3.276226 4.256831 3.016675 4.516382
## Jun 2022       3.676886 3.113581 4.240190 2.815386 4.538386
## Jul 2022       3.811861 3.184279 4.439443 2.852057 4.771665
## Aug 2022       3.842327 3.131480 4.553174 2.755180 4.929474
## Sep 2022       3.915106 3.137201 4.693012 2.725403 5.104810
f.a3<- forecast(arima3,h=8)
f.a3
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Feb 2022       4.114559 3.884891 4.344227 3.763312 4.465806
## Mar 2022       4.105233 3.763067 4.447399 3.581936 4.628530
## Apr 2022       4.254451 3.818675 4.690227 3.587989 4.920913
## May 2022       4.093089 3.566555 4.619624 3.287825 4.898354
## Jun 2022       3.989459 3.383173 4.595745 3.062224 4.916694
## Jul 2022       4.009839 3.332345 4.687332 2.973702 5.045975
## Aug 2022       4.009839 3.242616 4.777061 2.836473 5.183204
## Sep 2022       4.009839 3.162334 4.857343 2.713693 5.305984
#Test on predictive accuracy
dm.test(residuals(arima2),residuals(arima3) , alternative = "greater",h=1)
## 
##  Diebold-Mariano Test
## 
## data:  residuals(arima2)residuals(arima3)
## DM = -2.4983, Forecast horizon = 1, Loss function power = 2, p-value =
## 0.9935
## alternative hypothesis: greater