We install all the packages that we will need.
library(xts)
library(readxl)
library(quantmod)
library(zoo)
library(tseries)
library(forecast)
library(astsa)
library(lmtest)
library(rugarch)
library(moments)
library(astsa)
library(PerformanceAnalytics)
library(lmtest)
First we download the data of the real GDP of US and México
getSymbols.FRED(Symbols = c("GDPC1", "NAEXKP01MXQ189S"), env = (.GlobalEnv), return.class = "xts")
## [1] "GDPC1" "NAEXKP01MXQ189S"
Then in order to have the 2 variables in one dataset I merge the data and use the na.omit function to have the same dates.
dataset <- na.omit(merge(GDPC1, NAEXKP01MXQ189S))
names(dataset)[names(dataset) == "GDPC1"] <- "GDPUS"
names(dataset)[names(dataset) == "NAEXKP01MXQ189S"] <- "GDPMXX"
Now in order to see the the growth of the GDP since 1993 we create a index for the two real GDP.
dataset$indexusa = dataset$GDPUS /as.numeric( dataset$GDPUS[1])
dataset$indexmxx = dataset$GDPMXX /as.numeric( dataset$GDPMXX[1])
Then we plot the data.
plot(dataset$indexusa)
plot(dataset$indexmxx)
Now that we visualize our data we can see the growth of the US economy is much bigger than we expected.
So now we start to model our data in order to make a forecast.
We start by checking stationarity of the series
dataset$lngdpus <- log(dataset$GDPUS)
plot(diff(dataset$lngdpus, lag=4))
By looking at the graph we can not be sure of the stationarity of the series so we apply the Dicky-Fuller test.
adf.test(na.omit(diff(dataset$lngdpus,lag=4)),alternative="stationary",k=0)
## Warning in adf.test(na.omit(diff(dataset$lngdpus, lag = 4)), alternative =
## "stationary", : p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: na.omit(diff(dataset$lngdpus, lag = 4))
## Dickey-Fuller = -4.0975, Lag order = 0, p-value = 0.01
## alternative hypothesis: stationary
Since the p-value is less than 0.05 we can conclude that the series of the real GDP is stationarity. And we can start by lookig at the ACF and the PACF to start modeling our ARIMA-SARIMA.
acf2(diff(dataset$lngdpus,lag=4,))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## ACF 0.78 0.59 0.40 0.23 0.16 0.12 0.09 0.06 0.05 0.03 0.02 0.01 0.01
## PACF 0.78 -0.03 -0.14 -0.06 0.12 0.03 -0.06 -0.02 0.06 -0.01 -0.03 0.02 0.02
## [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21]
## ACF 0.01 0.02 0.04 0.02 0.01 -0.01 -0.06 -0.08
## PACF 0.00 0.02 0.02 -0.04 0.00 -0.02 -0.10 0.01
As we can look, the ACF plot show a slow decline of the autocorrelations, and the PACF shows a fast decline of autocorrelations too. Since this, we can assume this si going to be a AR signature.
But after doing the ARIMA-SARIMA we create the COVID variable to use it as a explanatory variable.
dataset$gdpccus <- diff(log(dataset$GDPUS),lag=4)
dataset$covidusdummy <- ifelse( dataset$gdpccus >= 0 ,0,1)
tworstus <- as.numeric((min(na.omit(dataset$gdpccus))))
dataset$covidusshock <- dataset$covidusdummy * (dataset$gdpccus/tworstus)
Now that we create the COVID shocks we can start doing the ARIMA-SARIMA model of the real GDP indorder to do a forecast to it.
And we can start by applying ARIMA-SARIMA(1,0,0)(0,1,0)
m1 <- Arima(dataset$GDPUS, order = c(1,0,0),
seasonal = list(order=c(0,1,1),period=4),
include.constant = TRUE,
xreg = dataset$covidusshock,
lambda = 0)
coeftest(m1)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.9910377 0.0114190 86.7886 < 2.2e-16 ***
## sma1 -0.8304509 0.0910460 -9.1212 < 2.2e-16 ***
## drift 0.0059662 0.0016745 3.5630 0.0003667 ***
## covidusshock -0.0905165 0.0045383 -19.9449 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
After doing a interpretation we have to check if the residuals looks more like a withe noise.
acf2(m1$residuals)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## ACF 0.25 0.20 0.16 0.15 0.18 0.15 0.16 -0.12 -0.04 0.06 0.01 0.01 0.00
## PACF 0.25 0.14 0.09 0.08 0.10 0.06 0.07 -0.26 -0.05 0.08 -0.01 0.00 0.03
## [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21]
## ACF -0.01 -0.06 -0.04 0.01 -0.08 0.00 -0.15 -0.05
## PACF 0.01 -0.02 -0.07 0.00 -0.05 0.05 -0.16 0.04
There are a lot of lags that are significant in the PACF so we decided to add a new MA term and an AR term in the model
m1_1 <- Arima(dataset$indexusa, order = c(2,0,0),
seasonal = list(order=c(0,1,1),period=4),
include.constant = TRUE,
xreg = dataset$covidusshock,
lambda = 0)
coeftest(m1_1)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 1.2292942 0.1115216 11.0229 < 2.2e-16 ***
## ar2 -0.2370517 0.1097641 -2.1596 0.0308 *
## sma1 -0.9064551 0.1398486 -6.4817 9.070e-11 ***
## drift 0.0058620 0.0012032 4.8719 1.105e-06 ***
## covidusshock -0.0901093 0.0041789 -21.5629 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
We check the residual to see a behave of withe noise.
acf2(m1_1$residuals,max.lag = 24)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## ACF -0.04 0.12 0.11 0.15 0.16 0.09 0.18 -0.18 -0.04 0.08 -0.02 0.00 -0.02
## PACF -0.04 0.12 0.12 0.15 0.16 0.07 0.14 -0.24 -0.19 0.01 -0.03 0.02 0.06
## [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## ACF 0.00 -0.06 -0.05 0.03 -0.10 0.04 -0.17 0.01 -0.13 0.04 -0.02
## PACF 0.06 0.03 -0.09 -0.06 -0.11 0.03 -0.16 0.03 -0.04 0.12 0.06
After doing a Interpretation we decide to check the AIC Crieiria of the 2 models
m1$aic
## [1] -761.6295
m1_1$aic
## [1] -764.6881
Know that we are sure about our model we can do a Interpretation of the following model: ARIMA-SARIMA(1,0,0)(0,1,1)
coeftest(m1_1)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 1.2292942 0.1115216 11.0229 < 2.2e-16 ***
## ar2 -0.2370517 0.1097641 -2.1596 0.0308 *
## sma1 -0.9064551 0.1398486 -6.4817 9.070e-11 ***
## drift 0.0058620 0.0012032 4.8719 1.105e-06 ***
## covidusshock -0.0901093 0.0041789 -21.5629 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
INTERPRETATION
Now we can forecast the real GDP of USA
forecast_usgdp <- forecast(m1_1,xreg=dataset$lngdpus, h=16)
## Warning in forecast.forecast_ARIMA(m1_1, xreg = dataset$lngdpus, h = 16): xreg
## contains different column names from the xreg used in training. Please check
## that the regressors are in the same order.
autoplot(forecast_usgdp)
We start by checking stationarity of the series
dataset$lngdpmxx <- log(dataset$GDPMXX)
plot(diff(dataset$lngdpmxx, lag=4))
By looking at the graph we can not be sure of the stationarity of the series so we apply the Dicky-Fuller test.
adf.test(na.omit(diff(dataset$lngdpmxx,lag=4)),alternative="stationary",k=0)
##
## Augmented Dickey-Fuller Test
##
## data: na.omit(diff(dataset$lngdpmxx, lag = 4))
## Dickey-Fuller = -4.0126, Lag order = 0, p-value = 0.0112
## alternative hypothesis: stationary
Since the p-value is less than 0.05 we can conclude that the series of the real GDP is stationarity.
acf2(diff(dataset$lngdpmxx,lag=4,))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## ACF 0.74 0.45 0.18 -0.04 -0.06 -0.06 -0.05 -0.05 -0.08 -0.09 -0.09 -0.09
## PACF 0.74 -0.21 -0.16 -0.10 0.20 -0.06 -0.08 -0.03 -0.02 0.00 -0.04 -0.03
## [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21]
## ACF -0.06 -0.03 0.00 0.00 -0.05 -0.10 -0.16 -0.16 -0.11
## PACF 0.03 0.01 0.01 -0.08 -0.07 -0.05 -0.05 0.02 0.00
As we can look, the ACF plot show a slow decline of the autocorrelations, and the PACF shows a fast decline of autocorrelations too. Since this, we can assume this si going to be a AR signature.
But after doing the ARIMA-SARIMA we create the COVID variable to use it as a explanatory variable.
dataset$gdpccmxx <- diff(log(dataset$GDPMXX),lag=4)
dataset$covidmxxdummy <- ifelse( dataset$gdpccmxx >= 0 ,0,1)
tworstus <- as.numeric((min(na.omit(dataset$gdpccmxx))))
dataset$covidmxxshock <- dataset$covidmxxdummy * (dataset$gdpccus/tworstus)
Now that we create the COVID shocks we can start doing the ARIMA-SARIMA model of the real GDP indorder to do a forecast to it.
And we can start by applying ARIMA-SARIMA(1,0,0)(0,1,0) with the explanatories variables of the COVID shock and the GDP of USA
m2 <- Arima(dataset$GDPMXX, order = c(1,0,0),
seasonal = list(order=c(0,1,0),period=4),
include.constant = TRUE,
xreg = dataset[,c("covidmxxshock", "lngdpus")],
lambda = 0)
coeftest(m2)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.7942701 0.0605696 13.1133 < 2.2e-16 ***
## drift -0.0071008 0.0021087 -3.3674 0.0007587 ***
## covidmxxshock 0.0913434 0.0337555 2.7060 0.0068094 **
## lngdpus 1.9593181 0.1865043 10.5055 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
We check the residual to see a behave of withe noise.
acf2(m2$residuals,max.lag = 24)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## ACF 0.1 -0.10 -0.09 -0.35 0.08 0.21 0.06 -0.09 -0.06 -0.06 -0.02 0.02 0.09
## PACF 0.1 -0.11 -0.07 -0.35 0.15 0.12 0.01 -0.21 0.08 0.03 -0.05 -0.15 0.16
## [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## ACF -0.05 0.10 0.15 -0.03 0.09 -0.10 -0.15 0.10 0.04 0.04 -0.01
## PACF -0.06 0.15 0.07 0.08 0.07 -0.05 -0.08 0.14 -0.01 0.00 -0.10
There are a lot of lags that are significant in the PACF so we decided to add a new MA term.
m2_1 <- Arima(dataset$indexmxx, order = c(2,0,0),
seasonal = list(order=c(0,1,0),period=4),
include.constant = TRUE,
xreg = dataset[,c("covidmxxshock", "lngdpus")],
lambda = 0)
coeftest(m2_1)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.9028487 0.0995691 9.0676 < 2.2e-16 ***
## ar2 -0.1514333 0.1109117 -1.3654 0.1721430
## drift -0.0065172 0.0018784 -3.4695 0.0005215 ***
## covidmxxshock 0.0823720 0.0340136 2.4217 0.0154464 *
## lngdpus 1.8951274 0.1921506 9.8627 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
We check the residual to see a behave of withe noise.
acf2(m2_1$residuals,max.lag = 24)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## ACF 0.05 0.05 0.12 -0.20 0.16 0.14 0.00 -0.12 -0.04 -0.04 -0.01 0.02 0.09
## PACF 0.05 0.05 0.11 -0.22 0.18 0.13 0.02 -0.25 0.04 0.02 -0.01 -0.11 0.19
## [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## ACF -0.06 0.11 0.19 -0.01 0.18 -0.02 -0.11 0.12 0.03 0.02 0.00
## PACF -0.03 0.13 0.11 0.06 0.06 -0.05 -0.12 0.10 0.02 0.00 -0.08
We calculate the aic criteria to see which model we should use
m2$aic
## [1] -558.8584
m2_1$aic
## [1] -558.7503
Now we forecast the real GDP of México
forecast_mxxgdp <- forecast(m2_1,xreg=dataset[,c("covidmxxshock", "lngdpus")])
## Warning in forecast.forecast_ARIMA(m2_1, xreg = dataset[, c("covidmxxshock", :
## Upper prediction intervals are not finite.
autoplot(forecast_mxxgdp)