library(fpp2)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.4 ──
## ✔ ggplot2 3.3.6 ✔ fma 2.4
## ✔ forecast 8.17.0 ✔ expsmooth 2.3
##
library(ggplot2)
library(xts)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(tseries)
library(readxl)
library(forecast)
library(seastests)
## Warning: package 'seastests' was built under R version 4.2.2
data <- read_excel("C:/Users/notebook/Downloads/Aus_UnemploymentRate.xlsx")
#Convert to time series data
data.ts <- ts(data[,-1] , frequency = 12 , start = c(2000,1))
data.xts <- as.xts(data.ts)
#Construct time series graph
autoplot(data.xts , main = "Australia's Monthly Unemployment Rate from January 2000 to January 2022") +
labs(x = "Time" , y = "Unemployment Rate (%)") +
ggeasy::easy_center_title() +
geom_line(colour = "blue", size = 1)
#Construct seasonal plot
ggseasonplot(data.ts, year.labels=TRUE, year.labels.left=TRUE) +
ylab("Unemployment Rate (%)") +
ggtitle("Seasonal Plot: Australia's Monthly Unemployment Rate from 2000 to 2022") +
ggeasy::easy_center_title()
### Data Analysis
#Density of Unemployment Rate
hist(data.ts, col = "lightblue", border = "black", prob = TRUE, xlim = c(3, 8), xlab = "Unemployment Rate (%)", main = "Density of Australia's Monthly Unemployment Rate (2000-2022)")
lines(density(data.ts), lwd = 2, col = "red")
#QQ-Plot of Unemployment Rate
qqnorm(data.ts, pch = 1, frame = FALSE, main="QQ-plot of Australia's Monthly Unemployment Rate (2000-2022)")
qqline(data.ts, col = "red", lwd = 2)
##Original Data
#Sample ACF and PACF
ggAcf(data.ts,main="Sample ACF of Unemployment Rate") +
ggeasy::easy_center_title()
## Warning: Ignoring unknown parameters: main
ggPacf(data.ts,main="Sample PACF of Unemployment Rate") +
ggeasy::easy_center_title()
## Warning: Ignoring unknown parameters: main
#Seasonality test
fried(data.ts,freq=12)
## Test used: Friedman rank
##
## Test statistic: 1.08
## P-value: 0.9999242
#Stationarity test
kpss.test(data.ts)
##
## KPSS Test for Level Stationarity
##
## data: data.ts
## KPSS Level = 0.48255, Truncation lag parameter = 5, p-value = 0.0456
#Check whether differencing is required
nsdiffs(data.ts)
## [1] 0
ndiffs(data.ts)
## [1] 1
##Differenced Data
#First order differencing
diff.data <-diff(data.ts)
ggtsdisplay(diff.data,main = "Differenced Time Series Data from January 2000 to January 2022",lag=50)
#Stationarity test
adf.test(diff.data)
## Warning in adf.test(diff.data): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: diff.data
## Dickey-Fuller = -5.2962, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
kpss.test(diff.data)
## Warning in kpss.test(diff.data): p-value greater than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: diff.data
## KPSS Level = 0.080794, Truncation lag parameter = 5, p-value = 0.1
#ARIMA(0,1,0)
arima1 <- Arima(data.ts,order=c(0,1,0),method="ML")
arima1
## Series: data.ts
## ARIMA(0,1,0)
##
## sigma^2 = 0.03339: log likelihood = 74.13
## AIC=-146.25 AICc=-146.24 BIC=-142.68
#ARIMA(6,1,6)
arima2 <- Arima(data.ts,order=c(6,1,6),method="ML")
arima2
## Series: data.ts
## ARIMA(6,1,6)
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ma1 ma2
## -0.1153 0.1374 0.2483 -0.0704 -0.1972 -0.6495 0.1806 -0.0902
## s.e. 0.1103 0.1015 0.1130 0.1049 0.1025 0.0981 0.0894 0.0761
## ma3 ma4 ma5 ma6
## -0.1539 0.0999 0.1798 0.9047
## s.e. 0.0853 0.0763 0.0898 0.0781
##
## sigma^2 = 0.02986: log likelihood = 90.22
## AIC=-154.43 AICc=-152.97 BIC=-107.94
#ARIMA(0,1,6)
arima3 <- Arima(data.ts,order=c(0,1,6),method="ML")
arima3
## Series: data.ts
## ARIMA(0,1,6)
##
## Coefficients:
## ma1 ma2 ma3 ma4 ma5 ma6
## 0.1043 0.0706 0.1118 0.0220 0.0077 0.2512
## s.e. 0.0601 0.0614 0.0607 0.0662 0.0709 0.0770
##
## sigma^2 = 0.03212: log likelihood = 82.08
## AIC=-150.17 AICc=-149.73 BIC=-125.14
#Log-likelihood and AICc value can be obtained from parameter estimation's code
#Compute MSE value
mean(arima1$residuals^2)
## [1] 0.03326602
mean(arima2$residuals^2)
## [1] 0.02839851
mean(arima3$residuals^2)
## [1] 0.03126819
quad <- tslm(data.ts~trend + I(trend^2))
#Plot of actual vs deterministic fitted value
autoplot(data.xts) +
autolayer(fitted(quad), series="Quadratic Trend ") +
ggtitle("Australia's Actual VS Predicted Monthly Unemployment Rate ") +
xlab("Year") + ylab("Unemployment Rate (%)") + xlab("Time") +
guides(colour=guide_legend(title="Model")) +
ggeasy::easy_center_title()
### Model Comparison between Deterministic and Stochastic Model
#Construct next 24 months forecast
f.arima4<- forecast(arima2,h=24)
f.det<- forecast(quad,h=24)
#Plot both stochastic and deterministic forecasted values on the same graph
autoplot(data.xts) +
autolayer(f.arima4, series="ARIMA (6,1,6) ") +
autolayer(f.det, series="Quadratic Trend ") +
ggtitle("Forecast of Stochastic VS Deterministic Time Series Model ") +
xlab("Year") + ylab("Unemployment Rate (%)") + xlab("Time") +
guides(colour=guide_legend(title="Model")) +
ggeasy::easy_center_title()
#Plot both stochastic and deterministic fitted values on the same graph
autoplot(data.xts) +
autolayer(fitted(arima2), series="ARIMA (6,1,6) ") +
autolayer(fitted(quad), series="Quadratic Trend ") +
ggtitle("Stochastic VS Deterministic Time Series Model ") +
xlab("Year") + ylab("Unemployment Rate (%)") + xlab("Time") +
guides(colour=guide_legend(title="Model")) +
ggeasy::easy_center_title()
#Plot original, ACF and density graph of residuals
checkresiduals(quad)
##
## Breusch-Godfrey test for serial correlation of order up to 24
##
## data: Residuals from Linear regression model
## LM test = 245.39, df = 24, p-value < 2.2e-16
checkresiduals(arima2)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(6,1,6)
## Q* = 11.166, df = 12, p-value = 0.5147
##
## Model df: 12. Total lags used: 24
#Ljung-box Test
Box.test(residuals(arima2),lag=24, fitdf=2, type="Lj")
##
## Box-Ljung test
##
## data: residuals(arima2)
## X-squared = 11.166, df = 22, p-value = 0.9723
#Obtain R-sqaured value of
summary(quad)
##
## Call:
## tslm(formula = data.ts ~ trend + I(trend^2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.86813 -0.44435 0.04955 0.44574 1.66573
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.357e+00 1.167e-01 54.470 < 2e-16 ***
## trend -1.681e-02 2.026e-03 -8.296 5.73e-15 ***
## I(trend^2) 5.908e-05 7.377e-06 8.008 3.82e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6286 on 262 degrees of freedom
## Multiple R-squared: 0.2081, Adjusted R-squared: 0.202
## F-statistic: 34.41 on 2 and 262 DF, p-value: 5.363e-14
#Compute MSE value
mean(quad$residuals^2)
## [1] 0.3906036
mean(arima2$residuals^2)
## [1] 0.02839851
#12 months of unemployment rate forecast with ARIMA (6,1,6) model
f.arima2<- forecast(arima2,h=12)
f.arima2
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Feb 2022 4.059439 3.836457 4.282421 3.718417 4.400461
## Mar 2022 3.961406 3.635639 4.287172 3.463189 4.459623
## Apr 2022 3.940772 3.531438 4.350105 3.314749 4.566794
## May 2022 3.766529 3.276226 4.256831 3.016675 4.516382
## Jun 2022 3.676886 3.113581 4.240190 2.815386 4.538386
## Jul 2022 3.811861 3.184279 4.439443 2.852057 4.771665
## Aug 2022 3.842327 3.131480 4.553174 2.755180 4.929474
## Sep 2022 3.915106 3.137201 4.693012 2.725403 5.104810
## Oct 2022 3.998474 3.159544 4.837404 2.715441 5.281507
## Nov 2022 4.127776 3.233297 5.022256 2.759788 5.495765
## Dec 2022 4.171859 3.229251 5.114466 2.730265 5.613453
## Jan 2023 4.106441 3.120734 5.092148 2.598932 5.613950
autoplot(f.arima2, main="ARIMA (6,1,6) Forecasted 12 Months Australia's Monthly Unemployment Rate ",ylab="Unemployment Rate (%)") +
ggeasy::easy_center_title()
#Compute forecasted values for the next 8 months
f.a2<- forecast(arima2,h=8)
f.a2
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Feb 2022 4.059439 3.836457 4.282421 3.718417 4.400461
## Mar 2022 3.961406 3.635639 4.287172 3.463189 4.459623
## Apr 2022 3.940772 3.531438 4.350105 3.314749 4.566794
## May 2022 3.766529 3.276226 4.256831 3.016675 4.516382
## Jun 2022 3.676886 3.113581 4.240190 2.815386 4.538386
## Jul 2022 3.811861 3.184279 4.439443 2.852057 4.771665
## Aug 2022 3.842327 3.131480 4.553174 2.755180 4.929474
## Sep 2022 3.915106 3.137201 4.693012 2.725403 5.104810
f.a3<- forecast(arima3,h=8)
f.a3
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Feb 2022 4.114559 3.884891 4.344227 3.763312 4.465806
## Mar 2022 4.105233 3.763067 4.447399 3.581936 4.628530
## Apr 2022 4.254451 3.818675 4.690227 3.587989 4.920913
## May 2022 4.093089 3.566555 4.619624 3.287825 4.898354
## Jun 2022 3.989459 3.383173 4.595745 3.062224 4.916694
## Jul 2022 4.009839 3.332345 4.687332 2.973702 5.045975
## Aug 2022 4.009839 3.242616 4.777061 2.836473 5.183204
## Sep 2022 4.009839 3.162334 4.857343 2.713693 5.305984
#Test on predictive accuracy
dm.test(residuals(arima2),residuals(arima3) , alternative = "greater",h=1)
##
## Diebold-Mariano Test
##
## data: residuals(arima2)residuals(arima3)
## DM = -2.4983, Forecast horizon = 1, Loss function power = 2, p-value =
## 0.9935
## alternative hypothesis: greater