R Markdown

# Libraries
library(readxl)
library(tseries)    
## Warning: package 'tseries' was built under R version 4.4.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(forecast)
## Warning: package 'forecast' was built under R version 4.4.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
# Database (from Excel)
pib_data <- read_excel("db_PIB2.xlsx")
# Converting the date column to Date format
pib_data$fecha <- as.Date(pib_data$fecha, format = "%d/%m/%y")

# Filter data from 2000 to 2024
pib_data <- pib_data[pib_data$fecha >= as.Date("2000-01-01") & 
                     pib_data$fecha <= as.Date("2024-12-31"),]
# Extracting the starting year and quarter
start_year <- as.numeric(format(min(pib_data$fecha), "%Y"))
start_month <- as.numeric(format(min(pib_data$fecha), "%m"))

# Converting month into quarter
start_quarter <- ifelse(start_month %in% c(1, 2, 3), 1,
                 ifelse(start_month %in% c(4, 5, 6), 2,
                 ifelse(start_month %in% c(7, 8, 9), 3, 4)))

# Creating the time series object with quarterly frequency
ts_data <- ts(pib_data$pib,
              start = c(start_year, start_quarter),
              frequency = 4)

#Import and Transform Data in RStudio: Upload the dataset to RStudio and convert it into a time series object using the as.ts() function.

# Converting to time series using as.ts()
ts_data = as.ts(ts_data)

# Displaying the time series
ts_data
##          Qtr1     Qtr2     Qtr3     Qtr4
## 2000 17562072 17786512 17980809 17778211
## 2001 17895081 17758108 17721837 17568749
## 2002 17459227 17649247 17813026 17851054
## 2003 17846064 17868090 17877093 18039385
## 2004 18277012 18520992 18509751 18707302
## 2005 18748764 18752841 18943014 19294067
## 2006 19676167 19841753 19892102 19963992
## 2007 20107079 20234691 20307737 20359607
## 2008 20354106 20505531 20521779 20197940
## 2009 18952309 18633041 19309769 19731182
## 2010 19814164 20029359 20219279 20386074
## 2011 20528681 20591212 20977196 21128881
## 2012 21330425 21461669 21516987 21684247
## 2013 21703435 21614636 21750398 21857318
## 2014 22001548 22281559 22319088 22508686
## 2015 22618510 22816784 23085376 23017589
## 2016 22992150 23094709 23296235 23556605
## 2017 23693966 23720108 23578936 23908388
## 2018 24208912 24134241 24230199 24186080
## 2019 24190818 24123758 24122015 23947538
## 2020 23660840 19187221 22164196 23122943
## 2021 23288550 23507958 23301496 23574312
## 2022 23901537 24184105 24386676 24671752
## 2023 24798272 24983768 25181152 25202781
## 2024 25255205 25227221 25524271 25275173
# Converting to millions (for a better and easier visualization)
ts_millions = ts_data / 1e6
log_ts = log(ts_millions)

1) Check for Stationarity: visual inspection, ADF test and apply differencing if needed

plot(log_ts,
     main = "Mexico Quarterly GDP (2000–2024)",
     ylab = "GDP (Millions of MXN)",
     xlab = "Time",
     col = "#0B3C5D",
     lwd = 2)

adf_original = adf.test(log_ts)
print(adf_original)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  log_ts
## Dickey-Fuller = -2.6697, Lag order = 4, p-value = 0.2992
## alternative hypothesis: stationary

#p-value was > 0.05 thus differencing needs to be applied

diff1 = diff(log_ts)
#Check for stationary again
#With the ADF Test
adf.test(diff1)
## Warning in adf.test(diff1): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff1
## Dickey-Fuller = -5.2017, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
# Plot differenced data
autoplot(diff1) +
  ggtitle("Differenced Series") +
  xlab("Time") + ylab("Differenced Value")

#Plot decomposition to see if there are other behaviors or patterns

decompose(log_ts)
## $x
##          Qtr1     Qtr2     Qtr3     Qtr4
## 2000 2.865742 2.878440 2.889305 2.877974
## 2001 2.884526 2.876842 2.874798 2.866122
## 2002 2.859868 2.870693 2.879930 2.882063
## 2003 2.881783 2.883016 2.883520 2.892557
## 2004 2.905644 2.918905 2.918298 2.928914
## 2005 2.931128 2.931345 2.941435 2.959798
## 2006 2.979408 2.987788 2.990323 2.993930
## 2007 3.001072 3.007399 3.011002 3.013553
## 2008 3.013283 3.020695 3.021487 3.005581
## 2009 2.941926 2.924936 2.960611 2.982200
## 2010 2.986397 2.997199 3.006637 3.014852
## 2011 3.021823 3.024864 3.043436 3.050641
## 2012 3.060134 3.066269 3.068843 3.076586
## 2013 3.077471 3.073371 3.079632 3.084536
## 2014 3.091113 3.103759 3.105442 3.113901
## 2015 3.118769 3.127496 3.139199 3.136259
## 2016 3.135153 3.139604 3.148292 3.159406
## 2017 3.165220 3.166323 3.160354 3.174229
## 2018 3.186721 3.183632 3.187600 3.185777
## 2019 3.185973 3.183197 3.183125 3.175866
## 2020 3.163821 2.954245 3.098478 3.140825
## 2021 3.147962 3.157339 3.148518 3.160158
## 2022 3.173943 3.185696 3.194037 3.205659
## 2023 3.210774 3.218226 3.226096 3.226954
## 2024 3.229032 3.227924 3.239630 3.229823
## 
## $seasonal
##              Qtr1         Qtr2         Qtr3         Qtr4
## 2000  0.001906817 -0.006841192  0.001241194  0.003693181
## 2001  0.001906817 -0.006841192  0.001241194  0.003693181
## 2002  0.001906817 -0.006841192  0.001241194  0.003693181
## 2003  0.001906817 -0.006841192  0.001241194  0.003693181
## 2004  0.001906817 -0.006841192  0.001241194  0.003693181
## 2005  0.001906817 -0.006841192  0.001241194  0.003693181
## 2006  0.001906817 -0.006841192  0.001241194  0.003693181
## 2007  0.001906817 -0.006841192  0.001241194  0.003693181
## 2008  0.001906817 -0.006841192  0.001241194  0.003693181
## 2009  0.001906817 -0.006841192  0.001241194  0.003693181
## 2010  0.001906817 -0.006841192  0.001241194  0.003693181
## 2011  0.001906817 -0.006841192  0.001241194  0.003693181
## 2012  0.001906817 -0.006841192  0.001241194  0.003693181
## 2013  0.001906817 -0.006841192  0.001241194  0.003693181
## 2014  0.001906817 -0.006841192  0.001241194  0.003693181
## 2015  0.001906817 -0.006841192  0.001241194  0.003693181
## 2016  0.001906817 -0.006841192  0.001241194  0.003693181
## 2017  0.001906817 -0.006841192  0.001241194  0.003693181
## 2018  0.001906817 -0.006841192  0.001241194  0.003693181
## 2019  0.001906817 -0.006841192  0.001241194  0.003693181
## 2020  0.001906817 -0.006841192  0.001241194  0.003693181
## 2021  0.001906817 -0.006841192  0.001241194  0.003693181
## 2022  0.001906817 -0.006841192  0.001241194  0.003693181
## 2023  0.001906817 -0.006841192  0.001241194  0.003693181
## 2024  0.001906817 -0.006841192  0.001241194  0.003693181
## 
## $trend
##          Qtr1     Qtr2     Qtr3     Qtr4
## 2000       NA       NA 2.880213 2.882361
## 2001 2.880348 2.877053 2.872490 2.868639
## 2002 2.868512 2.871146 2.875878 2.880158
## 2003 2.882147 2.883907 2.888202 2.895671
## 2004 2.904504 2.913396 2.921126 2.925866
## 2005 2.930313 2.937066 2.946962 2.960052
## 2006 2.973218 2.983596 2.990570 2.995730
## 2007 3.000766 3.005804 3.009783 3.012971
## 2008 3.015944 3.016258 3.006342 2.985452
## 2009 2.965873 2.955341 2.957977 2.972569
## 2010 2.987355 2.997190 3.005699 3.013586
## 2011 3.021644 3.030717 3.039980 3.049944
## 2012 3.058296 3.064715 3.070125 3.073180
## 2013 3.075416 3.077759 3.080458 3.085961
## 2014 3.092986 3.099883 3.107011 3.113435
## 2015 3.120622 3.127636 3.132479 3.136040
## 2016 3.138690 3.142720 3.149372 3.156470
## 2017 3.161318 3.164679 3.169219 3.174070
## 2018 3.179640 3.184489 3.185839 3.185691
## 2019 3.185077 3.183279 3.179271 3.147883
## 2020 3.108683 3.093722 3.087360 3.110764
## 2021 3.142406 3.151077 3.156742 3.163534
## 2022 3.172768 3.184146 3.194437 3.203108
## 2023 3.211181 3.217851 3.222795 3.226289
## 2024 3.229193 3.231244       NA       NA
## 
## $random
##               Qtr1          Qtr2          Qtr3          Qtr4
## 2000            NA            NA  7.850664e-03 -8.081054e-03
## 2001  2.270808e-03  6.630082e-03  1.066765e-03 -6.210307e-03
## 2002 -1.055027e-02  6.388448e-03  2.810960e-03 -1.788202e-03
## 2003 -2.270612e-03  5.950259e-03 -5.922930e-03 -6.806329e-03
## 2004 -7.665173e-04  1.235042e-02 -4.069127e-03 -6.453447e-04
## 2005 -1.092368e-03  1.120435e-03 -6.767505e-03 -3.947494e-03
## 2006  4.282998e-03  1.103382e-02 -1.488790e-03 -5.492556e-03
## 2007 -1.600628e-03  8.436214e-03 -2.190605e-05 -3.111325e-03
## 2008 -4.567777e-03  1.127816e-02  1.390395e-02  1.643527e-02
## 2009 -2.585396e-02 -2.356333e-02  1.392637e-03  5.938019e-03
## 2010 -2.864860e-03  6.850639e-03 -3.040908e-04 -2.426973e-03
## 2011 -1.727761e-03  9.881507e-04  2.214770e-03 -2.996755e-03
## 2012 -6.815011e-05  8.394934e-03 -2.523446e-03 -2.868351e-04
## 2013  1.475483e-04  2.453335e-03 -2.066712e-03 -5.118801e-03
## 2014 -3.780278e-03  1.071730e-02 -2.809807e-03 -3.226930e-03
## 2015 -3.759997e-03  6.701520e-03  5.479380e-03 -3.474742e-03
## 2016 -5.444106e-03  3.724571e-03 -2.321466e-03 -7.573825e-04
## 2017  1.995473e-03  8.485531e-03 -1.010665e-02 -3.534143e-03
## 2018  5.174356e-03  5.983945e-03  5.196583e-04 -3.607062e-03
## 2019 -1.011161e-03  6.759210e-03  2.612514e-03  2.428919e-02
## 2020  5.323130e-02 -1.326367e-01  9.877097e-03  2.636787e-02
## 2021  3.649009e-03  1.310271e-02 -9.465261e-03 -7.069349e-03
## 2022 -7.323482e-04  8.390884e-03 -1.641736e-03 -1.141929e-03
## 2023 -2.314251e-03  7.216839e-03  2.059690e-03 -3.028175e-03
## 2024 -2.067795e-03  3.521262e-03            NA            NA
## 
## $figure
## [1]  0.001906817 -0.006841192  0.001241194  0.003693181
## 
## $type
## [1] "additive"
## 
## attr(,"class")
## [1] "decomposed.ts"
plot(decompose(log_ts))

2) Analyze ACF and PACF to identify potential model structures and appropiate lag values

# ACF and PACF
par(mfrow = c(1,2))
acf(diff1, main = "ACF of Differenced Series")
pacf(diff1, main = "PACF of Differenced Series")

par(mfrow = c(1,1))

Fit the candidate models - Manual

#Models
model1=Arima(log_ts, order = c(0,1,0), include.drift = TRUE)
model2=Arima(log_ts, order = c(0,1,1), include.drift = TRUE)
model3=Arima(log_ts, order = c(0,1,2), include.drift = TRUE)
model4=Arima(log_ts, order = c(1,1,0), include.drift = TRUE)
model5=Arima(log_ts, order = c(1,1,1), include.drift = TRUE)
model6=Arima(log_ts, order = c(1,1,2), include.drift = TRUE)

#Models
model1
## Series: log_ts 
## ARIMA(0,1,0) with drift 
## 
## Coefficients:
##        drift
##       0.0037
## s.e.  0.0028
## 
## sigma^2 = 0.0007925:  log likelihood = 213.48
## AIC=-422.95   AICc=-422.83   BIC=-417.76
model2
## Series: log_ts 
## ARIMA(0,1,1) with drift 
## 
## Coefficients:
##           ma1   drift
##       -0.3388  0.0037
## s.e.   0.1155  0.0018
## 
## sigma^2 = 0.0007404:  log likelihood = 217.29
## AIC=-428.59   AICc=-428.34   BIC=-420.8
model3
## Series: log_ts 
## ARIMA(0,1,2) with drift 
## 
## Coefficients:
##           ma1      ma2   drift
##       -0.3015  -0.1552  0.0037
## s.e.   0.0987   0.1019  0.0015
## 
## sigma^2 = 0.0007301:  log likelihood = 218.47
## AIC=-428.93   AICc=-428.5   BIC=-418.55
model4
## Series: log_ts 
## ARIMA(1,1,0) with drift 
## 
## Coefficients:
##           ar1   drift
##       -0.2225  0.0037
## s.e.   0.0976  0.0023
## 
## sigma^2 = 0.0007604:  log likelihood = 216.01
## AIC=-426.02   AICc=-425.76   BIC=-418.23
model5
## Series: log_ts 
## ARIMA(1,1,1) with drift 
## 
## Coefficients:
##          ar1      ma1   drift
##       0.6935  -1.0000  0.0038
## s.e.  0.0743   0.0325  0.0003
## 
## sigma^2 = 0.0006815:  log likelihood = 220.5
## AIC=-433   AICc=-432.58   BIC=-422.62
model6
## Series: log_ts 
## ARIMA(1,1,2) with drift 
## 
## Coefficients:
##          ar1      ma1     ma2   drift
##       0.7744  -1.1547  0.1547  0.0038
## s.e.  0.0987   0.1585  0.1557  0.0003
## 
## sigma^2 = 0.0006837:  log likelihood = 220.98
## AIC=-431.95   AICc=-431.31   BIC=-418.98
#Compare models with AIC
model_comparison <- data.frame(
  Model = c("ARIMA(0,1,0)",
            "ARIMA(0,1,1)",
            "ARIMA(0,1,2)",
            "ARIMA(1,1,0)",
            "ARIMA(1,1,1)",
            "ARIMA(1,1,2)"),
  AIC = c(AIC(model1),
          AIC(model2),
          AIC(model3),
          AIC(model4),
          AIC(model5),
          AIC(model6))
)

model_comparison <- model_comparison[order(model_comparison$AIC), ]
print(model_comparison)
##          Model       AIC
## 5 ARIMA(1,1,1) -433.0030
## 6 ARIMA(1,1,2) -431.9508
## 3 ARIMA(0,1,2) -428.9301
## 2 ARIMA(0,1,1) -428.5881
## 4 ARIMA(1,1,0) -426.0156
## 1 ARIMA(0,1,0) -422.9517

#Auto.arima models

fit_arima <- auto.arima(log_ts, seasonal = TRUE)
summary(fit_arima)
## Series: log_ts 
## ARIMA(0,1,2) with drift 
## 
## Coefficients:
##           ma1      ma2   drift
##       -0.3015  -0.1552  0.0037
## s.e.   0.0987   0.1019  0.0015
## 
## sigma^2 = 0.0007301:  log likelihood = 218.47
## AIC=-428.93   AICc=-428.5   BIC=-418.55
## 
## Training set error measures:
##                        ME       RMSE        MAE          MPE      MAPE     MASE
## Training set 5.943548e-05 0.02647491 0.01126389 -0.003447065 0.3725021 0.378671
##                     ACF1
## Training set 0.004610251
auto.arima(diff1)
## Series: diff1 
## ARIMA(0,0,2) with non-zero mean 
## 
## Coefficients:
##           ma1      ma2    mean
##       -0.3015  -0.1552  0.0037
## s.e.   0.0987   0.1019  0.0015
## 
## sigma^2 = 0.00073:  log likelihood = 218.47
## AIC=-428.93   AICc=-428.5   BIC=-418.55
auto.arima(log_ts)
## Series: log_ts 
## ARIMA(0,1,2) with drift 
## 
## Coefficients:
##           ma1      ma2   drift
##       -0.3015  -0.1552  0.0037
## s.e.   0.0987   0.1019  0.0015
## 
## sigma^2 = 0.0007301:  log likelihood = 218.47
## AIC=-428.93   AICc=-428.5   BIC=-418.55
#The output of the 3 indicate that the best model is ARIMA(0,1,2) with drift

3) Residual diagnosis with Ljung-Box

checkresiduals(model5)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,1) with drift
## Q* = 1.7891, df = 6, p-value = 0.938
## 
## Model df: 2.   Total lags used: 8
Box.test(model5$residuals, type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  model5$residuals
## X-squared = 0.42169, df = 1, p-value = 0.5161
#Constant variance -- residual plot over time 
plot(model5$residuals,
     main = "Residuals Over Time",
     ylab = "Residuals",
     xlab = "Time",
     col  = "#0B3C5D",
     lwd  = 1.5)
abline(h = 0, col = "red", lty = 2)

#Forecasting #Forecast with data until 2024 and forecast 2025, then prove it against the actual values of the 2025 GDP

gdp_forecast <- forecast(model5, h = 4)
print(gdp_forecast)
##         Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2025 Q1       3.237242 3.203628 3.270857 3.185833 3.288651
## 2025 Q2       3.243559 3.202471 3.284646 3.180720 3.306397
## 2025 Q3       3.249110 3.204759 3.293461 3.181281 3.316939
## 2025 Q4       3.254131 3.208216 3.300046 3.183910 3.324352
# Plot forecast
plot(gdp_forecast, main = "Forecast for 2025 GDP")

autoplot(gdp_forecast) +
  autolayer(log_ts, color = "black", size = 1) +
  theme_classic()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the forecast package.
##   Please report the issue at <https://github.com/robjhyndman/forecast/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

exp(gdp_forecast$mean)
##          Qtr1     Qtr2     Qtr3     Qtr4
## 2025 25.46340 25.62475 25.76740 25.89710
forecast_values <- exp(gdp_forecast$mean)
actual_values <- c(25.36062, 25.48931, 25.50637, 25.72661)

comparison_table <- data.frame(
  Quarter = c("Q1 2025","Q2 2025","Q3 2025","Q4 2025"),
  
  Forecast = exp(gdp_forecast$mean),
  
  Lo80 = exp(gdp_forecast$lower[,1]),
  Hi80 = exp(gdp_forecast$upper[,1]),
  
  Lo95 = exp(gdp_forecast$lower[,2]),
  Hi95 = exp(gdp_forecast$upper[,2]),
  
  Actual = actual_values
)

# Compute error
comparison_table$Deviation <- comparison_table$Forecast - comparison_table$Actual

print(comparison_table)
##   Quarter Forecast     Lo80     Hi80     Lo95     Hi95   Actual Deviation
## 1 Q1 2025 25.46340 24.62169 26.33389 24.18743 26.80669 25.36062 0.1027821
## 2 Q2 2025 25.62475 24.59322 26.69954 24.06408 27.28664 25.48931 0.1354390
## 3 Q3 2025 25.76740 24.64956 26.93592 24.07758 27.57581 25.50637 0.2610279
## 4 Q4 2025 25.89710 24.73491 27.11389 24.14095 27.78100 25.72661 0.1704877